summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp2
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp206
-rw-r--r--lib/Analysis/CaptureTracking.cpp4
-rw-r--r--lib/Analysis/DemandedBits.cpp15
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp7
-rw-r--r--lib/Analysis/InstructionSimplify.cpp15
-rw-r--r--lib/Analysis/LazyCallGraph.cpp20
-rw-r--r--lib/Analysis/Lint.cpp2
-rw-r--r--lib/Analysis/LoopInfo.cpp6
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp41
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp20
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp25
-rw-r--r--lib/Analysis/ValueTracking.cpp53
-rw-r--r--lib/Analysis/VectorUtils.cpp2
-rw-r--r--lib/AsmParser/LLLexer.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp74
-rw-r--r--lib/AsmParser/LLParser.h3
-rw-r--r--lib/AsmParser/LLToken.h2
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp92
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp64
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp8
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp4
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp36
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp42
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp2
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp10
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h6
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp85
-rw-r--r--lib/CodeGen/MIRPrinter.cpp54
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp37
-rw-r--r--lib/CodeGen/MachineFunction.cpp12
-rw-r--r--lib/CodeGen/MachineInstr.cpp20
-rw-r--r--lib/CodeGen/MachineVerifier.cpp8
-rw-r--r--lib/CodeGen/MacroFusion.cpp2
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp2
-rw-r--r--lib/CodeGen/RegAllocFast.cpp13
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp28
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp15
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp91
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp199
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp93
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp55
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp134
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp9
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp7
-rw-r--r--lib/CodeGen/SplitKit.cpp8
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp54
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp91
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp12
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp124
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp2
-rw-r--r--lib/DebugInfo/PDB/CMakeLists.txt2
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp26
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp48
-rw-r--r--lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp60
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp27
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTable.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/PublicsStream.cpp16
-rw-r--r--lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp89
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp19
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindings.cpp35
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h104
-rw-r--r--lib/ExecutionEngine/Orc/OrcError.cpp21
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp33
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp3
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp3
-rw-r--r--lib/Fuzzer/CMakeLists.txt2
-rw-r--r--lib/Fuzzer/FuzzerCorpus.h81
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp6
-rw-r--r--lib/Fuzzer/FuzzerExtFunctionsWeak.cpp3
-rw-r--r--lib/Fuzzer/FuzzerFlags.def4
-rw-r--r--lib/Fuzzer/FuzzerIOWindows.cpp4
-rw-r--r--lib/Fuzzer/FuzzerInternal.h8
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp66
-rw-r--r--lib/Fuzzer/FuzzerOptions.h1
-rw-r--r--lib/Fuzzer/FuzzerUtilDarwin.cpp13
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt3
-rw-r--r--lib/Fuzzer/test/FuzzerUnittest.cpp5
-rw-r--r--lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp19
-rw-r--r--lib/Fuzzer/test/reduce_inputs.test13
-rw-r--r--lib/IR/AsmWriter.cpp66
-rw-r--r--lib/IR/CMakeLists.txt1
-rw-r--r--lib/IR/ConstantFold.cpp38
-rw-r--r--lib/IR/Constants.cpp73
-rw-r--r--lib/IR/Core.cpp22
-rw-r--r--lib/IR/Instruction.cpp11
-rw-r--r--lib/IR/Instructions.cpp74
-rw-r--r--lib/IR/LLVMContext.cpp20
-rw-r--r--lib/IR/LLVMContextImpl.cpp14
-rw-r--r--lib/IR/LLVMContextImpl.h14
-rw-r--r--lib/IR/Module.cpp4
-rw-r--r--lib/IR/SafepointIRVerifier.cpp437
-rw-r--r--lib/IR/Type.cpp2
-rw-r--r--lib/IR/Verifier.cpp96
-rw-r--r--lib/LTO/LTO.cpp11
-rw-r--r--lib/Linker/IRMover.cpp18
-rw-r--r--lib/MC/ELFObjectWriter.cpp10
-rw-r--r--lib/MC/MCAssembler.cpp16
-rw-r--r--lib/MC/MachObjectWriter.cpp2
-rw-r--r--lib/MC/WasmObjectWriter.cpp161
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp11
-rw-r--r--lib/Object/WasmObjectFile.cpp40
-rw-r--r--lib/Object/WindowsResource.cpp4
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp3
-rw-r--r--lib/Option/OptTable.cpp8
-rw-r--r--lib/Passes/PassBuilder.cpp262
-rw-r--r--lib/ProfileData/InstrProf.cpp46
-rw-r--r--lib/ProfileData/InstrProfReader.cpp20
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp43
-rw-r--r--lib/Support/CommandLine.cpp2
-rw-r--r--lib/Support/DynamicLibrary.cpp43
-rw-r--r--lib/Support/ErrorHandling.cpp62
-rw-r--r--lib/Support/Host.cpp405
-rw-r--r--lib/Support/Mutex.cpp5
-rw-r--r--lib/Support/Unix/DynamicLibrary.inc3
-rw-r--r--lib/Support/Unix/Host.inc25
-rw-r--r--lib/Support/Unix/Program.inc3
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc2
-rw-r--r--lib/Support/Windows/Host.inc4
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp2
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp2
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp8
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp32
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td11
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp13
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp5
-rw-r--r--lib/Target/AArch64/AArch64RedundantCopyElimination.cpp1
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp5
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp6
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp9
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp13
-rw-r--r--lib/Target/AMDGPU/AMDGPUMacroFusion.cpp64
-rw-r--r--lib/Target/AMDGPU/AMDGPUMacroFusion.h19
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp58
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp47
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp166
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td1
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h3
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp1
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp120
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h3
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp20
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h8
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp78
-rw-r--r--lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td28
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td18
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp1
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp20
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td2
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp312
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp207
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h33
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp32
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h36
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h8
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp5
-rw-r--r--lib/Target/AVR/AVRDevices.td23
-rw-r--r--lib/Target/AVR/AVRInstrInfo.cpp72
-rw-r--r--lib/Target/AVR/AVRInstrInfo.h4
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td32
-rw-r--r--lib/Target/AVR/AVRMCInstLower.cpp16
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp11
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.td7
-rw-r--r--lib/Target/AVR/AVRTargetMachine.cpp6
-rw-r--r--lib/Target/AVR/AsmParser/AVRAsmParser.cpp1
-rw-r--r--lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp4
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonConstPropagation.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp46
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp56
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h27
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp74
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h21
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td165
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td10
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp12
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h13
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp2
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp1
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp4
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp282
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp32
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td11
-rw-r--r--lib/Target/Mips/MipsMTInstrFormats.td99
-rw-r--r--lib/Target/Mips/MipsMTInstrInfo.td208
-rw-r--r--lib/Target/Mips/MipsSchedule.td18
-rw-r--r--lib/Target/Mips/MipsScheduleGeneric.td14
-rw-r--r--lib/Target/Mips/MipsScheduleP5600.td2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp3
-rw-r--r--lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h9
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp35
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp36
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp177
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h7
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td191
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td190
-rw-r--r--lib/Target/PowerPC/PPCScheduleP9.td4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h7
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp4
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp3
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp97
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h15
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td76
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td18
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td214
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td4
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td4
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp5
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp28
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp41
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp4
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp56
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h8
-rw-r--r--lib/Target/X86/X86.td1
-rw-r--r--lib/Target/X86/X86CallLowering.cpp47
-rw-r--r--lib/Target/X86/X86CallLowering.h2
-rw-r--r--lib/Target/X86/X86CallingConv.td10
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp5
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp537
-rw-r--r--lib/Target/X86/X86ISelLowering.h13
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp153
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp12
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp195
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td2472
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td77
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp13
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp25
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp10
-rw-r--r--lib/Transforms/IPO/Inliner.cpp10
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp11
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp18
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp78
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp363
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp259
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp172
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp309
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h12
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp81
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp93
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp190
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp59
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp48
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp164
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--lib/Transforms/Instrumentation/CFGMST.h12
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp157
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h6
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp7
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp7
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp57
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp202
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp5
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp44
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp20
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp110
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp2
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp16
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp2
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp12
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp20
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp2
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp6
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp2
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp18
-rw-r--r--lib/Transforms/Utils/Local.cpp18
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp143
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp288
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp47
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp4
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp15
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp9
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp45
374 files changed, 11756 insertions, 4184 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index b52a1d7b24d62..e682a644ef2c1 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1006,7 +1006,7 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// Because they cannot partially overlap and because fields in an array
// cannot overlap, if we can prove the final indices are different between
// GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
-
+
// If the last indices are constants, we've already checked they don't
// equal each other so we can exit early.
if (C1 && C2)
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 23d5a887c34af..a329e5ad48c94 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -538,7 +538,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
// InstCombine canonicalizes X <= 0 into X < 1.
// X <= 0 -> Unlikely
isProb = false;
- } else if (CV->isAllOnesValue()) {
+ } else if (CV->isMinusOne()) {
switch (CI->getPredicate()) {
case CmpInst::ICMP_EQ:
// X == -1 -> Unlikely
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index 9d4521221f477..3ddefc6520a78 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -196,18 +196,117 @@ FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C,
bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
CGSCCAnalysisManager::Invalidator &Inv) {
- for (LazyCallGraph::Node &N : C)
- FAM->invalidate(N.getFunction(), PA);
+ // If literally everything is preserved, we're done.
+ if (PA.areAllPreserved())
+ return false; // This is still a valid proxy.
+
+ // If this proxy isn't marked as preserved, then even if the result remains
+ // valid, the key itself may no longer be valid, so we clear everything.
+ //
+ // Note that in order to preserve this proxy, a module pass must ensure that
+ // the FAM has been completely updated to handle the deletion of functions.
+ // Specifically, any FAM-cached results for those functions need to have been
+ // forcibly cleared. When preserved, this proxy will only invalidate results
+ // cached on functions *still in the module* at the end of the module pass.
+ auto PAC = PA.getChecker<FunctionAnalysisManagerCGSCCProxy>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<LazyCallGraph::SCC>>()) {
+ for (LazyCallGraph::Node &N : C)
+ FAM->clear(N.getFunction());
+
+ return true;
+ }
+
+ // Directly check if the relevant set is preserved.
+ bool AreFunctionAnalysesPreserved =
+ PA.allAnalysesInSetPreserved<AllAnalysesOn<Function>>();
+
+ // Now walk all the functions to see if any inner analysis invalidation is
+ // necessary.
+ for (LazyCallGraph::Node &N : C) {
+ Function &F = N.getFunction();
+ Optional<PreservedAnalyses> FunctionPA;
+
+ // Check to see whether the preserved set needs to be pruned based on
+ // SCC-level analysis invalidation that triggers deferred invalidation
+ // registered with the outer analysis manager proxy for this function.
+ if (auto *OuterProxy =
+ FAM->getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F))
+ for (const auto &OuterInvalidationPair :
+ OuterProxy->getOuterInvalidations()) {
+ AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+ const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+ if (Inv.invalidate(OuterAnalysisID, C, PA)) {
+ if (!FunctionPA)
+ FunctionPA = PA;
+ for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+ FunctionPA->abandon(InnerAnalysisID);
+ }
+ }
+
+ // Check if we needed a custom PA set, and if so we'll need to run the
+ // inner invalidation.
+ if (FunctionPA) {
+ FAM->invalidate(F, *FunctionPA);
+ continue;
+ }
- // This proxy doesn't need to handle invalidation itself. Instead, the
- // module-level CGSCC proxy handles it above by ensuring that if the
- // module-level FAM proxy becomes invalid the entire SCC layer, which
- // includes this proxy, is cleared.
+ // Otherwise we only need to do invalidation if the original PA set didn't
+ // preserve all function analyses.
+ if (!AreFunctionAnalysesPreserved)
+ FAM->invalidate(F, PA);
+ }
+
+ // Return false to indicate that this result is still a valid proxy.
return false;
}
} // End llvm namespace
+/// When a new SCC is created for the graph and there might be function
+/// analysis results cached for the functions now in that SCC two forms of
+/// updates are required.
+///
+/// First, a proxy from the SCC to the FunctionAnalysisManager needs to be
+/// created so that any subsequent invalidation events to the SCC are
+/// propagated to the function analysis results cached for functions within it.
+///
+/// Second, if any of the functions within the SCC have analysis results with
+/// outer analysis dependencies, then those dependencies would point to the
+/// *wrong* SCC's analysis result. We forcibly invalidate the necessary
+/// function analyses so that they don't retain stale handles.
+static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C,
+ LazyCallGraph &G,
+ CGSCCAnalysisManager &AM) {
+ // Get the relevant function analysis manager.
+ auto &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, G).getManager();
+
+ // Now walk the functions in this SCC and invalidate any function analysis
+ // results that might have outer dependencies on an SCC analysis.
+ for (LazyCallGraph::Node &N : C) {
+ Function &F = N.getFunction();
+
+ auto *OuterProxy =
+ FAM.getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F);
+ if (!OuterProxy)
+ // No outer analyses were queried, nothing to do.
+ continue;
+
+ // Forcibly abandon all the inner analyses with dependencies, but
+ // invalidate nothing else.
+ auto PA = PreservedAnalyses::all();
+ for (const auto &OuterInvalidationPair :
+ OuterProxy->getOuterInvalidations()) {
+ const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+ for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+ PA.abandon(InnerAnalysisID);
+ }
+
+ // Now invalidate anything we found.
+ FAM.invalidate(F, PA);
+ }
+}
+
namespace {
/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
@@ -236,7 +335,6 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
SCC *OldC = C;
- (void)OldC;
// Update the current SCC. Note that if we have new SCCs, this must actually
// change the SCC.
@@ -245,6 +343,26 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
C = &*NewSCCRange.begin();
assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+ // If we had a cached FAM proxy originally, we will want to create more of
+ // them for each SCC that was split off.
+ bool NeedFAMProxy =
+ AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*OldC) != nullptr;
+
+ // We need to propagate an invalidation call to all but the newly current SCC
+ // because the outer pass manager won't do that for us after splitting them.
+ // FIXME: We should accept a PreservedAnalysis from the CG updater so that if
+ // there are preserved ananalyses we can avoid invalidating them here for
+ // split-off SCCs.
+ // We know however that this will preserve any FAM proxy so go ahead and mark
+ // that.
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ AM.invalidate(*OldC, PA);
+
+ // Ensure the now-current SCC's function analyses are updated.
+ if (NeedFAMProxy)
+ updateNewSCCFunctionAnalyses(*C, G, AM);
+
for (SCC &NewC :
reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
assert(C != &NewC && "No need to re-visit the current SCC!");
@@ -252,6 +370,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
UR.CWorklist.insert(&NewC);
if (DebugLogging)
dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+
+ // Ensure new SCCs' function analyses are updated.
+ if (NeedFAMProxy)
+ updateNewSCCFunctionAnalyses(NewC, G, AM);
+
+ // Also propagate a normal invalidation to the new SCC as only the current
+ // will get one from the pass manager infrastructure.
+ AM.invalidate(NewC, PA);
}
return C;
}
@@ -349,14 +475,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// For separate SCCs this is trivial.
RC->switchTrivialInternalEdgeToRef(N, TargetN);
} else {
- // Otherwise we may end up re-structuring the call graph. First,
- // invalidate any SCC analyses. We have to do this before we split
- // functions into new SCCs and lose track of where their analyses are
- // cached.
- // FIXME: We should accept a more precise preserved set here. For
- // example, it might be possible to preserve some function analyses
- // even as the SCC structure is changed.
- AM.invalidate(*C, PreservedAnalyses::none());
// Now update the call graph.
C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
N, C, AM, UR, DebugLogging);
@@ -424,13 +542,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
continue;
}
- // Otherwise we may end up re-structuring the call graph. First, invalidate
- // any SCC analyses. We have to do this before we split functions into new
- // SCCs and lose track of where their analyses are cached.
- // FIXME: We should accept a more precise preserved set here. For example,
- // it might be possible to preserve some function analyses even as the SCC
- // structure is changed.
- AM.invalidate(*C, PreservedAnalyses::none());
// Now update the call graph.
C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N,
C, AM, UR, DebugLogging);
@@ -459,25 +570,48 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// Otherwise we are switching an internal ref edge to a call edge. This
// may merge away some SCCs, and we add those to the UpdateResult. We also
// need to make sure to update the worklist in the event SCCs have moved
- // before the current one in the post-order sequence.
+ // before the current one in the post-order sequence
+ bool HasFunctionAnalysisProxy = false;
auto InitialSCCIndex = RC->find(*C) - RC->begin();
- auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget);
- if (!InvalidatedSCCs.empty()) {
+ bool FormedCycle = RC->switchInternalEdgeToCall(
+ N, *CallTarget, [&](ArrayRef<SCC *> MergedSCCs) {
+ for (SCC *MergedC : MergedSCCs) {
+ assert(MergedC != &TargetC && "Cannot merge away the target SCC!");
+
+ HasFunctionAnalysisProxy |=
+ AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(
+ *MergedC) != nullptr;
+
+ // Mark that this SCC will no longer be valid.
+ UR.InvalidatedSCCs.insert(MergedC);
+
+ // FIXME: We should really do a 'clear' here to forcibly release
+ // memory, but we don't have a good way of doing that and
+ // preserving the function analyses.
+ auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ AM.invalidate(*MergedC, PA);
+ }
+ });
+
+ // If we formed a cycle by creating this call, we need to update more data
+ // structures.
+ if (FormedCycle) {
C = &TargetC;
assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
- // Any analyses cached for this SCC are no longer precise as the shape
- // has changed by introducing this cycle.
- AM.invalidate(*C, PreservedAnalyses::none());
-
- for (SCC *InvalidatedC : InvalidatedSCCs) {
- assert(InvalidatedC != C && "Cannot invalidate the current SCC!");
- UR.InvalidatedSCCs.insert(InvalidatedC);
+ // If one of the invalidated SCCs had a cached proxy to a function
+ // analysis manager, we need to create a proxy in the new current SCC as
+ // the invaliadted SCCs had their functions moved.
+ if (HasFunctionAnalysisProxy)
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G);
- // Also clear any cached analyses for the SCCs that are dead. This
- // isn't really necessary for correctness but can release memory.
- AM.clear(*InvalidatedC);
- }
+ // Any analyses cached for this SCC are no longer precise as the shape
+ // has changed by introducing this cycle. However, we have taken care to
+ // update the proxies so it remains valide.
+ auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ AM.invalidate(*C, PA);
}
auto NewSCCIndex = RC->find(*C) - RC->begin();
if (InitialSCCIndex < NewSCCIndex) {
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 2093f0fdec123..3b0026ba10e90 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -94,8 +94,8 @@ namespace {
// guarantee that 'I' never reaches 'BeforeHere' through a back-edge or
// by its successors, i.e, prune if:
//
- // (1) BB is an entry block or have no sucessors.
- // (2) There's no path coming back through BB sucessors.
+ // (1) BB is an entry block or have no successors.
+ // (2) There's no path coming back through BB successors.
if (BB == &BB->getParent()->getEntryBlock() ||
!BB->getTerminator()->getNumSuccessors())
return true;
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 926b28d6094a5..9c53f9140ca33 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -143,9 +143,8 @@ void DemandedBits::determineLiveOperandBits(
break;
case Instruction::Shl:
if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.lshr(ShiftAmt);
// If the shift is nuw/nsw, then the high bits are not dead
@@ -159,9 +158,8 @@ void DemandedBits::determineLiveOperandBits(
break;
case Instruction::LShr:
if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
// If the shift is exact, then the low bits are not dead
@@ -172,9 +170,8 @@ void DemandedBits::determineLiveOperandBits(
break;
case Instruction::AShr:
if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
// Because the high input bit is replicated into the
// high-order bits of the result, if we need any of those
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index e4d58bf1b4eb1..34eccc07f2655 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -3342,7 +3342,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
- (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
+ (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) &&
+ isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV);
}
unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
@@ -3371,7 +3372,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (Delinearize && CommonLevels > 1) {
if (tryDelinearize(Src, Dst, Pair)) {
- DEBUG(dbgs() << " delinerized GEP\n");
+ DEBUG(dbgs() << " delinearized GEP\n");
Pairs = Pair.size();
}
}
@@ -3796,7 +3797,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
if (Delinearize && CommonLevels > 1) {
if (tryDelinearize(Src, Dst, Pair)) {
- DEBUG(dbgs() << " delinerized GEP\n");
+ DEBUG(dbgs() << " delinearized GEP\n");
Pairs = Pair.size();
}
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index d9e32a3c417e0..f6632020b8fc7 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -560,7 +560,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return Y;
/// i1 add -> xor.
- if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -598,7 +598,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// folding.
static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
- assert(V->getType()->getScalarType()->isPointerTy());
+ assert(V->getType()->isPtrOrPtrVectorTy());
Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
@@ -627,8 +627,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
}
break;
}
- assert(V->getType()->getScalarType()->isPointerTy() &&
- "Unexpected operand type!");
+ assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
@@ -771,7 +770,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
// i1 sub -> xor.
- if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -902,7 +901,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return X;
// i1 mul -> and.
- if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -998,7 +997,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
// X % 1 -> 0
// If this is a boolean op (single-bit element type), we can't have
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
- if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1))
+ if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1))
return IsDiv ? Op0 : Constant::getNullValue(Ty);
return nullptr;
@@ -2251,7 +2250,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q) {
Type *ITy = GetCompareTy(LHS); // The return type.
Type *OpTy = LHS->getType(); // The operand type.
- if (!OpTy->getScalarType()->isIntegerTy(1))
+ if (!OpTy->isIntOrIntVectorTy(1))
return nullptr;
// A boolean compared to true/false can be simplified in 14 out of the 20
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index b6a9436cc1ec3..a4c3e43b4b0c2 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -456,8 +456,10 @@ updatePostorderSequenceForEdgeInsertion(
return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
}
-SmallVector<LazyCallGraph::SCC *, 1>
-LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
+bool
+LazyCallGraph::RefSCC::switchInternalEdgeToCall(
+ Node &SourceN, Node &TargetN,
+ function_ref<void(ArrayRef<SCC *> MergeSCCs)> MergeCB) {
assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!");
SmallVector<SCC *, 1> DeletedSCCs;
@@ -475,7 +477,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// we've just added more connectivity.
if (&SourceSCC == &TargetSCC) {
SourceN->setEdgeKind(TargetN, Edge::Call);
- return DeletedSCCs;
+ return false; // No new cycle.
}
// At this point we leverage the postorder list of SCCs to detect when the
@@ -488,7 +490,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
int TargetIdx = SCCIndices[&TargetSCC];
if (TargetIdx < SourceIdx) {
SourceN->setEdgeKind(TargetN, Edge::Call);
- return DeletedSCCs;
+ return false; // No new cycle.
}
// Compute the SCCs which (transitively) reach the source.
@@ -555,12 +557,16 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
ComputeTargetConnectedSet);
+ // Run the user's callback on the merged SCCs before we actually merge them.
+ if (MergeCB)
+ MergeCB(makeArrayRef(MergeRange.begin(), MergeRange.end()));
+
// If the merge range is empty, then adding the edge didn't actually form any
// new cycles. We're done.
if (MergeRange.begin() == MergeRange.end()) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
- return DeletedSCCs;
+ return false; // No new cycle.
}
#ifndef NDEBUG
@@ -596,8 +602,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
- // And we're done!
- return DeletedSCCs;
+ // And we're done, but we did form a new cycle.
+ return true;
}
void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9713588537b39..ada600a69b872 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -405,7 +405,7 @@ void Lint::visitMemoryReference(Instruction &I,
Assert(!isa<UndefValue>(UnderlyingObject),
"Undefined behavior: Undef pointer dereference", &I);
Assert(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
"Unusual: All-ones pointer dereference", &I);
Assert(!isa<ConstantInt>(UnderlyingObject) ||
!cast<ConstantInt>(UnderlyingObject)->isOne(),
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index ff68810abb827..baf932432a0a4 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -131,13 +131,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
PHINode *PN = cast<PHINode>(I);
if (ConstantInt *CI =
dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
- if (CI->isNullValue())
+ if (CI->isZero())
if (Instruction *Inc =
dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
if (Inc->getOpcode() == Instruction::Add &&
Inc->getOperand(0) == PN)
if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
- if (CI->equalsInt(1))
+ if (CI->isOne())
return PN;
}
return nullptr;
@@ -460,7 +460,7 @@ protected:
void UnloopUpdater::updateBlockParents() {
if (Unloop.getNumBlocks()) {
// Perform a post order CFG traversal of all blocks within this loop,
- // propagating the nearest loop from sucessors to predecessors.
+ // propagating the nearest loop from successors to predecessors.
LoopBlocksTraversal Traversal(DFS, LI);
for (BasicBlock *POI : Traversal) {
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f88d54b21e1e3..7327c07499bed 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -505,6 +505,22 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
return unknown();
}
+/// When we're compiling N-bit code, and the user uses parameters that are
+/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
+/// trouble with APInt size issues. This function handles resizing + overflow
+/// checks for us. Check and zext or trunc \p I depending on IntTyBits and
+/// I's value.
+bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) {
+ // More bits than we can handle. Checking the bit width isn't necessary, but
+ // it's faster than checking active bits, and should give `false` in the
+ // vast majority of cases.
+ if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
+ return false;
+ if (I.getBitWidth() != IntTyBits)
+ I = I.zextOrTrunc(IntTyBits);
+ return true;
+}
+
SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
@@ -515,8 +531,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
Value *ArraySize = I.getArraySize();
if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
- Size *= C->getValue().zextOrSelf(IntTyBits);
- return std::make_pair(align(Size, I.getAlignment()), Zero);
+ APInt NumElems = C->getValue();
+ if (!CheckedZextOrTrunc(NumElems))
+ return unknown();
+
+ bool Overflow;
+ Size = Size.umul_ov(NumElems, Overflow);
+ return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()),
+ Zero);
}
return unknown();
}
@@ -561,21 +583,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
if (!Arg)
return unknown();
- // When we're compiling N-bit code, and the user uses parameters that are
- // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
- // trouble with APInt size issues. This function handles resizing + overflow
- // checks for us.
- auto CheckedZextOrTrunc = [&](APInt &I) {
- // More bits than we can handle. Checking the bit width isn't necessary, but
- // it's faster than checking active bits, and should give `false` in the
- // vast majority of cases.
- if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
- return false;
- if (I.getBitWidth() != IntTyBits)
- I = I.zextOrTrunc(IntTyBits);
- return true;
- };
-
APInt Size = Arg->getValue();
if (!CheckedZextOrTrunc(Size))
return unknown();
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 095647e1bd20b..e9e354ebb88f9 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -266,7 +266,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
- CalleeInfo::HotnessType::Hot);
+ CalleeInfo::HotnessType::Critical);
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 678ad3af5e852..3fb1ab980add8 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -326,7 +326,7 @@ bool SCEV::isOne() const {
bool SCEV::isAllOnesValue() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
- return SC->getValue()->isAllOnesValue();
+ return SC->getValue()->isMinusOne();
return false;
}
@@ -2743,7 +2743,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
}
// If we are left with a constant one being multiplied, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {
Ops.erase(Ops.begin());
--Idx;
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
@@ -2939,7 +2939,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
"SCEVUDivExpr operand types don't match!");
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
- if (RHSC->getValue()->equalsInt(1))
+ if (RHSC->getValue()->isOne())
return LHS; // X udiv 1 --> x
// If the denominator is zero, the result of the udiv is undefined. Don't
// try to analyze it, because the resolution chosen here may differ from
@@ -5421,9 +5421,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
- if (CI->isNullValue())
+ if (CI->isZero())
return getSCEV(BO->RHS);
- if (CI->isAllOnesValue())
+ if (CI->isMinusOne())
return getSCEV(BO->LHS);
const APInt &A = CI->getValue();
@@ -5498,7 +5498,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case Instruction::Xor:
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
// If the RHS of xor is -1, then this is a not operation.
- if (CI->isAllOnesValue())
+ if (CI->isMinusOne())
return getNotSCEV(getSCEV(BO->LHS));
// Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
@@ -5577,7 +5577,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (CI->getValue().uge(BitWidth))
break;
- if (CI->isNullValue())
+ if (CI->isZero())
return getSCEV(BO->LHS); // shift by zero --> noop
uint64_t AShrAmt = CI->getZExtValue();
@@ -7626,7 +7626,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
// We have not yet seen any such cases.
const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
- if (!StepC || StepC->getValue()->equalsInt(0))
+ if (!StepC || StepC->getValue()->isZero())
return getCouldNotCompute();
// For positive steps (counting up until unsigned overflow):
@@ -7640,7 +7640,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// Handle unitary steps, which cannot wraparound.
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
- if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+ if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
APInt MaxBECount = getUnsignedRangeMax(Distance);
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
@@ -7696,7 +7696,7 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
// If the value is a constant, check to see if it is known to be non-zero
// already. If so, the backedge will execute zero times.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
- if (!C->getValue()->isNullValue())
+ if (!C->getValue()->isZero())
return getZero(C->getType());
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index f938a9a520650..94bbc58541a77 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include <utility>
@@ -23,6 +24,11 @@ using namespace llvm;
#define DEBUG_TYPE "tti"
+static cl::opt<bool> UseWideMemcpyLoopLowering(
+ "use-wide-memcpy-loop-lowering", cl::init(false),
+ cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
+ cl::Hidden);
+
namespace {
/// \brief No-op implementation of the TTI interface using the utility base
/// classes.
@@ -482,6 +488,25 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
+Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context,
+ Value *Length,
+ unsigned SrcAlign,
+ unsigned DestAlign) const {
+ return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
+ DestAlign);
+}
+
+void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
+ TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
+ SrcAlign, DestAlign);
+}
+
+bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const {
+ return UseWideMemcpyLoopLowering;
+}
+
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
return TTIImpl->areInlineCompatible(Caller, Callee);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index fd6e3a643bf03..9e042da8801db 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1500,12 +1500,10 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Known.getBitWidth();
- assert((V->getType()->isIntOrIntVectorTy() ||
- V->getType()->getScalarType()->isPointerTy()) &&
+ assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
+ V->getType()->isPtrOrPtrVectorTy()) &&
"Not integer or pointer type!");
- assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
- (!V->getType()->isIntOrIntVectorTy() ||
- V->getType()->getScalarSizeInBits() == BitWidth) &&
+ assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth &&
"V and Known should have same BitWidth");
(void)BitWidth;
@@ -1952,7 +1950,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
// Check if all incoming values are non-zero constant.
bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
- return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZeroValue();
+ return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
});
if (AllNonZeroConstants)
return true;
@@ -4393,7 +4391,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
}
Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
- const DataLayout &DL, bool InvertAPred,
+ const DataLayout &DL, bool LHSIsFalse,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
@@ -4402,26 +4400,51 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
return None;
Type *OpTy = LHS->getType();
- assert(OpTy->getScalarType()->isIntegerTy(1));
+ assert(OpTy->isIntOrIntVectorTy(1));
// LHS ==> RHS by definition
- if (!InvertAPred && LHS == RHS)
- return true;
+ if (LHS == RHS)
+ return !LHSIsFalse;
if (OpTy->isVectorTy())
// TODO: extending the code below to handle vectors
return None;
assert(OpTy->isIntegerTy(1) && "implied by above");
- ICmpInst::Predicate APred, BPred;
- Value *ALHS, *ARHS;
Value *BLHS, *BRHS;
+ ICmpInst::Predicate BPred;
+ // We expect the RHS to be an icmp.
+ if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+ return None;
- if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
- !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+ Value *ALHS, *ARHS;
+ ICmpInst::Predicate APred;
+ // The LHS can be an 'or', 'and', or 'icmp'.
+ if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) {
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth == MaxDepth)
+ return None;
+ // If the result of an 'or' is false, then we know both legs of the 'or' are
+ // false. Similarly, if the result of an 'and' is true, then we know both
+ // legs of the 'and' are true.
+ if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
+ (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
+ if (Optional<bool> Implication = isImpliedCondition(
+ ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
+ return Implication;
+ if (Optional<bool> Implication = isImpliedCondition(
+ ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
+ return Implication;
+ return None;
+ }
return None;
+ }
+ // All of the below logic assumes both LHS and RHS are icmps.
+ assert(isa<ICmpInst>(LHS) && isa<ICmpInst>(RHS) && "Expected icmps.");
- if (InvertAPred)
+ // The rest of the logic assumes the LHS condition is true. If that's not the
+ // case, invert the predicate to make it so.
+ if (LHSIsFalse)
APred = CmpInst::getInversePredicate(APred);
// Can we infer anything when the two compares have matching operands?
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 0ace8fa382bc7..554d132c2ab77 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -301,7 +301,7 @@ const llvm::Value *llvm::getSplatValue(const Value *V) {
auto *InsertEltInst =
dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
- !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+ !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
return nullptr;
return InsertEltInst->getOperand(1);
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index a49276099f194..428bb21fbf517 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -542,7 +542,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(release);
KEYWORD(acq_rel);
KEYWORD(seq_cst);
- KEYWORD(singlethread);
+ KEYWORD(syncscope);
KEYWORD(nnan);
KEYWORD(ninf);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 9ad31125f4b8c..717eb0e00f4f4 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1919,20 +1919,42 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg,
}
/// ParseScopeAndOrdering
-/// if isAtomic: ::= 'singlethread'? AtomicOrdering
+/// if isAtomic: ::= SyncScope? AtomicOrdering
/// else: ::=
///
/// This sets Scope and Ordering to the parsed values.
-bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+bool LLParser::ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering) {
if (!isAtomic)
return false;
- Scope = CrossThread;
- if (EatIfPresent(lltok::kw_singlethread))
- Scope = SingleThread;
+ return ParseScope(SSID) || ParseOrdering(Ordering);
+}
+
+/// ParseScope
+/// ::= syncscope("singlethread" | "<target scope>")?
+///
+/// This sets synchronization scope ID to the ID of the parsed value.
+bool LLParser::ParseScope(SyncScope::ID &SSID) {
+ SSID = SyncScope::System;
+ if (EatIfPresent(lltok::kw_syncscope)) {
+ auto StartParenAt = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return Error(StartParenAt, "Expected '(' in syncscope");
+
+ std::string SSN;
+ auto SSNAt = Lex.getLoc();
+ if (ParseStringConstant(SSN))
+ return Error(SSNAt, "Expected synchronization scope name");
- return ParseOrdering(Ordering);
+ auto EndParenAt = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return Error(EndParenAt, "Expected ')' in syncscope");
+
+ SSID = Context.getOrInsertSyncScopeID(SSN);
+ }
+
+ return false;
}
/// ParseOrdering
@@ -3061,7 +3083,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
if (!Val0->getType()->isIntOrIntVectorTy() &&
- !Val0->getType()->getScalarType()->isPointerTy())
+ !Val0->getType()->isPtrOrPtrVectorTy())
return Error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
@@ -3210,7 +3232,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (Opc == Instruction::GetElementPtr) {
if (Elts.size() == 0 ||
- !Elts[0]->getType()->getScalarType()->isPointerTy())
+ !Elts[0]->getType()->isPtrOrPtrVectorTy())
return Error(ID.Loc, "base of getelementptr must be a pointer");
Type *BaseType = Elts[0]->getType();
@@ -3226,7 +3248,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
for (Constant *Val : Indices) {
Type *ValTy = Val->getType();
- if (!ValTy->getScalarType()->isIntegerTy())
+ if (!ValTy->isIntOrIntVectorTy())
return Error(ID.Loc, "getelementptr index must be an integer");
if (ValTy->isVectorTy()) {
unsigned ValNumEl = ValTy->getVectorNumElements();
@@ -5697,7 +5719,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
if (!LHS->getType()->isIntOrIntVectorTy() &&
- !LHS->getType()->getScalarType()->isPointerTy())
+ !LHS->getType()->isPtrOrPtrVectorTy())
return Error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
@@ -6100,7 +6122,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
bool AteExtraComma = false;
bool isAtomic = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
- SynchronizationScope Scope = CrossThread;
+ SyncScope::ID SSID = SyncScope::System;
if (Lex.getKind() == lltok::kw_atomic) {
isAtomic = true;
@@ -6118,7 +6140,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseType(Ty) ||
ParseToken(lltok::comma, "expected comma after load's type") ||
ParseTypeAndValue(Val, Loc, PFS) ||
- ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
+ ParseScopeAndOrdering(isAtomic, SSID, Ordering) ||
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
@@ -6134,7 +6156,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
return Error(ExplicitTypeLoc,
"explicit pointee type doesn't match operand's pointee type");
- Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope);
+ Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID);
return AteExtraComma ? InstExtraComma : InstNormal;
}
@@ -6149,7 +6171,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
bool AteExtraComma = false;
bool isAtomic = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
- SynchronizationScope Scope = CrossThread;
+ SyncScope::ID SSID = SyncScope::System;
if (Lex.getKind() == lltok::kw_atomic) {
isAtomic = true;
@@ -6165,7 +6187,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Val, Loc, PFS) ||
ParseToken(lltok::comma, "expected ',' after store operand") ||
ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
- ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
+ ParseScopeAndOrdering(isAtomic, SSID, Ordering) ||
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
@@ -6181,7 +6203,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
Ordering == AtomicOrdering::AcquireRelease)
return Error(Loc, "atomic store cannot use Acquire ordering");
- Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope);
+ Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID);
return AteExtraComma ? InstExtraComma : InstNormal;
}
@@ -6193,7 +6215,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
bool AteExtraComma = false;
AtomicOrdering SuccessOrdering = AtomicOrdering::NotAtomic;
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
- SynchronizationScope Scope = CrossThread;
+ SyncScope::ID SSID = SyncScope::System;
bool isVolatile = false;
bool isWeak = false;
@@ -6208,7 +6230,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
ParseTypeAndValue(New, NewLoc, PFS) ||
- ParseScopeAndOrdering(true /*Always atomic*/, Scope, SuccessOrdering) ||
+ ParseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) ||
ParseOrdering(FailureOrdering))
return true;
@@ -6231,7 +6253,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
if (!New->getType()->isFirstClassType())
return Error(NewLoc, "cmpxchg operand must be a first class value");
AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(
- Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope);
+ Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SSID);
CXI->setVolatile(isVolatile);
CXI->setWeak(isWeak);
Inst = CXI;
@@ -6245,7 +6267,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
Value *Ptr, *Val; LocTy PtrLoc, ValLoc;
bool AteExtraComma = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
- SynchronizationScope Scope = CrossThread;
+ SyncScope::ID SSID = SyncScope::System;
bool isVolatile = false;
AtomicRMWInst::BinOp Operation;
@@ -6271,7 +6293,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
ParseToken(lltok::comma, "expected ',' after atomicrmw address") ||
ParseTypeAndValue(Val, ValLoc, PFS) ||
- ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
return true;
if (Ordering == AtomicOrdering::Unordered)
@@ -6288,7 +6310,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
" integer");
AtomicRMWInst *RMWI =
- new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope);
+ new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
RMWI->setVolatile(isVolatile);
Inst = RMWI;
return AteExtraComma ? InstExtraComma : InstNormal;
@@ -6298,8 +6320,8 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'fence' 'singlethread'? AtomicOrdering
int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
- SynchronizationScope Scope = CrossThread;
- if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ SyncScope::ID SSID = SyncScope::System;
+ if (ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
return true;
if (Ordering == AtomicOrdering::Unordered)
@@ -6307,7 +6329,7 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
if (Ordering == AtomicOrdering::Monotonic)
return TokError("fence cannot be monotonic");
- Inst = new FenceInst(Context, Ordering, Scope);
+ Inst = new FenceInst(Context, Ordering, SSID);
return InstNormal;
}
@@ -6349,7 +6371,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
- if (!Val->getType()->getScalarType()->isIntegerTy())
+ if (!Val->getType()->isIntOrIntVectorTy())
return Error(EltLoc, "getelementptr index must be an integer");
if (Val->getType()->isVectorTy()) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 4616c2e86947c..d5b059355c423 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -241,8 +241,9 @@ namespace llvm {
bool ParseOptionalCallingConv(unsigned &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
- bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+ bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering);
+ bool ParseScope(SyncScope::ID &SSID);
bool ParseOrdering(AtomicOrdering &Ordering);
bool ParseOptionalStackAlignment(unsigned &Alignment);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 6c8ed7da495d1..9c7a06de81b45 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -93,7 +93,7 @@ enum Kind {
kw_release,
kw_acq_rel,
kw_seq_cst,
- kw_singlethread,
+ kw_syncscope,
kw_nnan,
kw_ninf,
kw_nsz,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 1ebef31731353..2b4970a80cddb 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -513,6 +513,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
TBAAVerifier TBAAVerifyHelper;
std::vector<std::string> BundleTags;
+ SmallVector<SyncScope::ID, 8> SSIDs;
public:
BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
@@ -648,6 +649,7 @@ private:
Error parseTypeTable();
Error parseTypeTableBody();
Error parseOperandBundleTags();
+ Error parseSyncScopeNames();
Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT);
@@ -668,6 +670,8 @@ private:
Error findFunctionInStream(
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
+
+ SyncScope::ID getDecodedSyncScopeID(unsigned Val);
};
/// Class to manage reading and parsing function summary index bitcode
@@ -998,14 +1002,6 @@ static AtomicOrdering getDecodedOrdering(unsigned Val) {
}
}
-static SynchronizationScope getDecodedSynchScope(unsigned Val) {
- switch (Val) {
- case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread;
- default: // Map unknown scopes to cross-thread.
- case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread;
- }
-}
-
static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
switch (Val) {
default: // Map unknown selection kinds to any.
@@ -1745,6 +1741,44 @@ Error BitcodeReader::parseOperandBundleTags() {
}
}
+Error BitcodeReader::parseSyncScopeNames() {
+ if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
+ return error("Invalid record");
+
+ if (!SSIDs.empty())
+ return error("Invalid multiple synchronization scope names blocks");
+
+ SmallVector<uint64_t, 64> Record;
+ while (true) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ if (SSIDs.empty())
+ return error("Invalid empty synchronization scope names block");
+ return Error::success();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Synchronization scope names are implicitly mapped to synchronization
+ // scope IDs by their order.
+
+ if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME)
+ return error("Invalid record");
+
+ SmallString<16> SSN;
+ if (convertToString(Record, 0, SSN))
+ return error("Invalid record");
+
+ SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN));
+ Record.clear();
+ }
+}
+
/// Associate a value with its name from the given index in the provided record.
Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT) {
@@ -3132,6 +3166,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
if (Error Err = parseOperandBundleTags())
return Err;
break;
+ case bitc::SYNC_SCOPE_NAMES_BLOCK_ID:
+ if (Error Err = parseSyncScopeNames())
+ return Err;
+ break;
}
continue;
@@ -4204,7 +4242,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
break;
}
case bitc::FUNC_CODE_INST_LOADATOMIC: {
- // LOADATOMIC: [opty, op, align, vol, ordering, synchscope]
+ // LOADATOMIC: [opty, op, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
@@ -4226,12 +4264,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0)
return error("Invalid record");
- SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
+ SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
unsigned Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
- I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope);
+ I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID);
InstructionList.push_back(I);
break;
@@ -4260,7 +4298,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_STOREATOMIC:
case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: {
- // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope]
+ // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Val, *Ptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -4280,20 +4318,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Ordering == AtomicOrdering::Acquire ||
Ordering == AtomicOrdering::AcquireRelease)
return error("Invalid record");
- SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
+ SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0)
return error("Invalid record");
unsigned Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
- I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope);
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID);
InstructionList.push_back(I);
break;
}
case bitc::FUNC_CODE_INST_CMPXCHG_OLD:
case bitc::FUNC_CODE_INST_CMPXCHG: {
- // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope,
+ // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid,
// failureordering?, isweak?]
unsigned OpNum = 0;
Value *Ptr, *Cmp, *New;
@@ -4310,7 +4348,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (SuccessOrdering == AtomicOrdering::NotAtomic ||
SuccessOrdering == AtomicOrdering::Unordered)
return error("Invalid record");
- SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]);
+ SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]);
if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
return Err;
@@ -4322,7 +4360,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
FailureOrdering = getDecodedOrdering(Record[OpNum + 3]);
I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
- SynchScope);
+ SSID);
cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
if (Record.size() < 8) {
@@ -4339,7 +4377,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
break;
}
case bitc::FUNC_CODE_INST_ATOMICRMW: {
- // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope]
+ // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Ptr, *Val;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -4356,13 +4394,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Ordering == AtomicOrdering::NotAtomic ||
Ordering == AtomicOrdering::Unordered)
return error("Invalid record");
- SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
- I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
+ SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
+ I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
+ case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid]
if (2 != Record.size())
return error("Invalid record");
AtomicOrdering Ordering = getDecodedOrdering(Record[0]);
@@ -4370,8 +4408,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Ordering == AtomicOrdering::Unordered ||
Ordering == AtomicOrdering::Monotonic)
return error("Invalid record");
- SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]);
- I = new FenceInst(Context, Ordering, SynchScope);
+ SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]);
+ I = new FenceInst(Context, Ordering, SSID);
InstructionList.push_back(I);
break;
}
@@ -4567,6 +4605,14 @@ Error BitcodeReader::findFunctionInStream(
return Error::success();
}
+SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) {
+ if (Val == SyncScope::SingleThread || Val == SyncScope::System)
+ return SyncScope::ID(Val);
+ if (Val >= SSIDs.size())
+ return SyncScope::System; // Map unknown synchronization scopes to system.
+ return SSIDs[Val];
+}
+
//===----------------------------------------------------------------------===//
// GVMaterializer implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index b2b1ea6de374c..0e518d2bbc8ff 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -114,6 +114,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// True if a module hash record should be written.
bool GenerateHash;
+ SHA1 Hasher;
+
/// If non-null, when GenerateHash is true, the resulting hash is written
/// into ModHash. When GenerateHash is false, that specified value
/// is used as the hash instead of computing from the generated bitcode.
@@ -176,6 +178,8 @@ public:
private:
uint64_t bitcodeStartBit() { return BitcodeStartBit; }
+ size_t addToStrtab(StringRef Str);
+
void writeAttributeGroupTable();
void writeAttributeTable();
void writeTypeTable();
@@ -262,6 +266,7 @@ private:
const GlobalObject &GO);
void writeModuleMetadataKinds();
void writeOperandBundleTags();
+ void writeSyncScopeNames();
void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal);
void writeModuleConstants();
bool pushValueAndType(const Value *V, unsigned InstID,
@@ -312,6 +317,10 @@ private:
return VE.getValueID(VI.getValue());
}
std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
+
+ unsigned getEncodedSyncScopeID(SyncScope::ID SSID) {
+ return unsigned(SSID);
+ }
};
/// Class to manage the bitcode writing for a combined index.
@@ -481,14 +490,6 @@ static unsigned getEncodedOrdering(AtomicOrdering Ordering) {
llvm_unreachable("Invalid ordering");
}
-static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) {
- switch (SynchScope) {
- case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD;
- case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD;
- }
- llvm_unreachable("Invalid synch scope");
-}
-
static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
StringRef Str, unsigned AbbrevToUse) {
SmallVector<unsigned, 64> Vals;
@@ -947,11 +948,17 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) {
llvm_unreachable("Invalid unnamed_addr");
}
+size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) {
+ if (GenerateHash)
+ Hasher.update(Str);
+ return StrtabBuilder.add(Str);
+}
+
void ModuleBitcodeWriter::writeComdats() {
SmallVector<unsigned, 64> Vals;
for (const Comdat *C : VE.getComdats()) {
// COMDAT: [strtab offset, strtab size, selection_kind]
- Vals.push_back(StrtabBuilder.add(C->getName()));
+ Vals.push_back(addToStrtab(C->getName()));
Vals.push_back(C->getName().size());
Vals.push_back(getEncodedComdatSelectionKind(*C));
Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
@@ -1122,7 +1129,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass,
// comdat, attributes]
- Vals.push_back(StrtabBuilder.add(GV.getName()));
+ Vals.push_back(addToStrtab(GV.getName()));
Vals.push_back(GV.getName().size());
Vals.push_back(VE.getTypeID(GV.getValueType()));
Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant());
@@ -1161,7 +1168,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// linkage, paramattrs, alignment, section, visibility, gc,
// unnamed_addr, prologuedata, dllstorageclass, comdat,
// prefixdata, personalityfn]
- Vals.push_back(StrtabBuilder.add(F.getName()));
+ Vals.push_back(addToStrtab(F.getName()));
Vals.push_back(F.getName().size());
Vals.push_back(VE.getTypeID(F.getFunctionType()));
Vals.push_back(F.getCallingConv());
@@ -1191,7 +1198,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
for (const GlobalAlias &A : M.aliases()) {
// ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage,
// visibility, dllstorageclass, threadlocal, unnamed_addr]
- Vals.push_back(StrtabBuilder.add(A.getName()));
+ Vals.push_back(addToStrtab(A.getName()));
Vals.push_back(A.getName().size());
Vals.push_back(VE.getTypeID(A.getValueType()));
Vals.push_back(A.getType()->getAddressSpace());
@@ -1210,7 +1217,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
for (const GlobalIFunc &I : M.ifuncs()) {
// IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver
// val#, linkage, visibility]
- Vals.push_back(StrtabBuilder.add(I.getName()));
+ Vals.push_back(addToStrtab(I.getName()));
Vals.push_back(I.getName().size());
Vals.push_back(VE.getTypeID(I.getValueType()));
Vals.push_back(I.getType()->getAddressSpace());
@@ -2032,6 +2039,24 @@ void ModuleBitcodeWriter::writeOperandBundleTags() {
Stream.ExitBlock();
}
+void ModuleBitcodeWriter::writeSyncScopeNames() {
+ SmallVector<StringRef, 8> SSNs;
+ M.getContext().getSyncScopeNames(SSNs);
+ if (SSNs.empty())
+ return;
+
+ Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2);
+
+ SmallVector<uint64_t, 64> Record;
+ for (auto SSN : SSNs) {
+ Record.append(SSN.begin(), SSN.end());
+ Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
if ((int64_t)V >= 0)
Vals.push_back(V << 1);
@@ -2648,7 +2673,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(cast<LoadInst>(I).isVolatile());
if (cast<LoadInst>(I).isAtomic()) {
Vals.push_back(getEncodedOrdering(cast<LoadInst>(I).getOrdering()));
- Vals.push_back(getEncodedSynchScope(cast<LoadInst>(I).getSynchScope()));
+ Vals.push_back(getEncodedSyncScopeID(cast<LoadInst>(I).getSyncScopeID()));
}
break;
case Instruction::Store:
@@ -2662,7 +2687,8 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(cast<StoreInst>(I).isVolatile());
if (cast<StoreInst>(I).isAtomic()) {
Vals.push_back(getEncodedOrdering(cast<StoreInst>(I).getOrdering()));
- Vals.push_back(getEncodedSynchScope(cast<StoreInst>(I).getSynchScope()));
+ Vals.push_back(
+ getEncodedSyncScopeID(cast<StoreInst>(I).getSyncScopeID()));
}
break;
case Instruction::AtomicCmpXchg:
@@ -2674,7 +2700,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(
getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getSuccessOrdering()));
Vals.push_back(
- getEncodedSynchScope(cast<AtomicCmpXchgInst>(I).getSynchScope()));
+ getEncodedSyncScopeID(cast<AtomicCmpXchgInst>(I).getSyncScopeID()));
Vals.push_back(
getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getFailureOrdering()));
Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak());
@@ -2688,12 +2714,12 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
Vals.push_back(getEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
Vals.push_back(
- getEncodedSynchScope(cast<AtomicRMWInst>(I).getSynchScope()));
+ getEncodedSyncScopeID(cast<AtomicRMWInst>(I).getSyncScopeID()));
break;
case Instruction::Fence:
Code = bitc::FUNC_CODE_INST_FENCE;
Vals.push_back(getEncodedOrdering(cast<FenceInst>(I).getOrdering()));
- Vals.push_back(getEncodedSynchScope(cast<FenceInst>(I).getSynchScope()));
+ Vals.push_back(getEncodedSyncScopeID(cast<FenceInst>(I).getSyncScopeID()));
break;
case Instruction::Call: {
const CallInst &CI = cast<CallInst>(I);
@@ -3648,7 +3674,6 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
// Emit the module's hash.
// MODULE_CODE_HASH: [5*i32]
if (GenerateHash) {
- SHA1 Hasher;
uint32_t Vals[5];
Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
Buffer.size() - BlockStartPos));
@@ -3707,6 +3732,7 @@ void ModuleBitcodeWriter::write() {
writeUseListBlock(nullptr);
writeOperandBundleTags();
+ writeSyncScopeNames();
// Emit function bodies.
DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex;
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 344136b1f1956..aa9c8e94d08a3 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -361,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *NewLI = Builder.CreateLoad(NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
- NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
@@ -444,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
NewSI->setAlignment(SI->getAlignment());
NewSI->setVolatile(SI->isVolatile());
- NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
SI->eraseFromParent();
return NewSI;
@@ -801,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
- CI->getFailureOrdering(), CI->getSynchScope());
+ CI->getFailureOrdering(), CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
// When we're building a strong cmpxchg, we need a loop, so you
// might think we could use a weak cmpxchg inside. But, using strong
@@ -924,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
CI->getSuccessOrdering(),
CI->getFailureOrdering(),
- CI->getSynchScope());
+ CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
NewCI->setWeak(CI->isWeak());
DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index faa5f139cf7b3..b7fd45a3f6a66 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeRABasicPass(Registry);
+ initializeRAFastPass(Registry);
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index b50e76f2e3ba2..b7155ac2480a7 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -4270,6 +4270,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *Consensus = nullptr;
unsigned NumUsesConsensus = 0;
bool IsNumUsesConsensusValid = false;
+ bool PhiSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
TypePromotionTransaction TPT(RemovedInsts);
@@ -4289,6 +4290,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (Value *IncValue : P->incoming_values())
worklist.push_back(IncValue);
+ PhiSeen = true;
continue;
}
@@ -4342,9 +4344,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
TPT.commit();
// If all the instructions matched are already in this BB, don't do anything.
- if (none_of(AddrModeInsts, [&](Value *V) {
+ // If we saw Phi node then it is not local definitely.
+ if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) {
return IsNonLocalValue(V, MemoryInst->getParent());
- })) {
+ })) {
DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
@@ -4390,6 +4393,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrMode.Scale = 0;
}
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ //
+ // (See below for code to add the scale.)
+ if (AddrMode.Scale) {
+ Type *ScaledRegTy = AddrMode.ScaledReg->getType();
+ if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
+ cast<IntegerType>(ScaledRegTy)->getBitWidth())
+ return false;
+ }
+
if (AddrMode.BaseGV) {
if (ResultPtr)
return false;
@@ -4440,19 +4457,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
- } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
- cast<IntegerType>(V->getType())->getBitWidth()) {
- V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- // It is only safe to sign extend the BaseReg if we know that the math
- // required to create it did not overflow before we extend it. Since
- // the original IR value was tossed in favor of a constant back when
- // the AddrMode was created we need to bail out gracefully if widths
- // do not match instead of extending it.
- Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
- if (I && (ResultIndex != AddrMode.BaseReg))
- I->eraseFromParent();
- return false;
+ assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth() &&
+ "We can't transform if ScaledReg is too narrow");
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 521037f9d206b..ed1bd995e60be 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -345,7 +345,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
*MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
Flags, DL->getTypeStoreSize(LI.getType()),
getMemOpAlignment(LI), AAMDNodes(), nullptr,
- LI.getSynchScope(), LI.getOrdering()));
+ LI.getSyncScopeID(), LI.getOrdering()));
return true;
}
@@ -363,7 +363,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
*MF->getMachineMemOperand(
MachinePointerInfo(SI.getPointerOperand()), Flags,
DL->getTypeStoreSize(SI.getValueOperand()->getType()),
- getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(),
+ getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(),
SI.getOrdering()));
return true;
}
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 860fc9a4f8b61..bf427225d6a96 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -16,7 +16,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +30,9 @@
using namespace llvm;
+InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers, nullptr), MIs() {}
+
InstructionSelector::InstructionSelector() = default;
bool InstructionSelector::constrainOperandRegToRegClass(
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 84b0a0ac41579..49fb5e8f075b8 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -99,23 +99,19 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
-LegalizerHelper::LegalizeResult llvm::replaceWithLibcall(
- MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
- const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) {
+LegalizerHelper::LegalizeResult
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
+
MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
- MIRBuilder.setInstr(MI);
if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
MachineOperand::CreateES(Name), Result, Args))
return LegalizerHelper::UnableToLegalize;
- // We're about to remove MI, so move the insert point after it.
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(),
- std::next(MIRBuilder.getInsertPt()));
-
- MI.eraseFromParent();
return LegalizerHelper::Legalized;
}
@@ -123,10 +119,9 @@ static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
- return replaceWithLibcall(MI, MIRBuilder, Libcall,
- {MI.getOperand(0).getReg(), OpType},
- {{MI.getOperand(1).getReg(), OpType},
- {MI.getOperand(2).getReg(), OpType}});
+ return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
+ {{MI.getOperand(1).getReg(), OpType},
+ {MI.getOperand(2).getReg(), OpType}});
}
LegalizerHelper::LegalizeResult
@@ -135,6 +130,8 @@ LegalizerHelper::libcall(MachineInstr &MI) {
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
@@ -143,15 +140,24 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
Type *HLTy = Type::getInt32Ty(Ctx);
- return simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
- return simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
}
}
+
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 47c6214c05528..4636806c3f081 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -166,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
.addGlobalAddress(GV);
}
-MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0,
unsigned Op1) {
assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
"invalid operand type");
assert(MRI->getType(Res) == MRI->getType(Op0) &&
MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
- return buildInstr(TargetOpcode::G_ADD)
+ return buildInstr(Opcode)
.addDef(Res)
.addUse(Op0)
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1);
+}
+
MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
unsigned Op1) {
assert(MRI->getType(Res).isPointer() &&
@@ -222,41 +227,22 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
- "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
-
- return buildInstr(TargetOpcode::G_SUB)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+ return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1);
}
MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
- "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
-
- return buildInstr(TargetOpcode::G_MUL)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+ return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1);
}
MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0,
unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
- "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+ return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1);
+}
- return buildInstr(TargetOpcode::G_AND)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1);
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index 3746b74e0528a..f9ba4ffa6527c 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -67,7 +67,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
}
}
-void LiveRegUnits::accumulateBackward(const MachineInstr &MI) {
+void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 1f1ce6e8d7250..58a655a4dee4f 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
}
+static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '"')
+ return None;
+ return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
+ ErrorCallback);
+}
+
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
auto Range = C;
C.advance(); // Skip '%'
@@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
+ return R.remaining();
Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(),
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 3e9513111bf4f..08b82e59c4fc1 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -127,7 +127,8 @@ struct MIToken {
NamedIRValue,
IRValue,
QuotedIRValue, // `<constant value>`
- SubRegisterIndex
+ SubRegisterIndex,
+ StringConstant
};
private:
@@ -168,7 +169,8 @@ public:
bool isMemoryOperandFlag() const {
return Kind == kw_volatile || Kind == kw_non_temporal ||
- Kind == kw_dereferenceable || Kind == kw_invariant;
+ Kind == kw_dereferenceable || Kind == kw_invariant ||
+ Kind == StringConstant;
}
bool is(TokenKind K) const { return Kind == K; }
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c58d192284dd0..c68d87b15a317 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -141,6 +141,8 @@ class MIParser {
StringMap<unsigned> Names2DirectTargetFlags;
/// Maps from direct target flag names to the bitmask target flag values.
StringMap<unsigned> Names2BitmaskTargetFlags;
+ /// Maps from MMO target flag names to MMO target flag values.
+ StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -229,6 +231,7 @@ public:
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
@@ -318,6 +321,18 @@ private:
///
/// Return true if the name isn't a name of a bitmask target flag.
bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
+
+ void initNames2MMOTargetFlags();
+
+ /// Try to convert a name of a MachineMemOperand target flag to the
+ /// corresponding target flag.
+ ///
+ /// Return true if the name isn't a name of a target MMO flag.
+ bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+
+ /// parseStringConstant
+ /// ::= StringConstant
+ bool parseStringConstant(std::string &Result);
};
} // end anonymous namespace
@@ -2034,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
case MIToken::kw_invariant:
Flags |= MachineMemOperand::MOInvariant;
break;
- // TODO: parse the target specific memory operand flags.
+ case MIToken::StringConstant: {
+ MachineMemOperand::Flags TF;
+ if (getMMOTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target MMO flag '" + Token.stringValue() +
+ "'");
+ Flags |= TF;
+ break;
+ }
default:
llvm_unreachable("The current token should be a memory operand flag");
}
@@ -2135,6 +2157,26 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
return false;
}
+bool MIParser::parseOptionalScope(LLVMContext &Context,
+ SyncScope::ID &SSID) {
+ SSID = SyncScope::System;
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") {
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected '(' in syncscope");
+
+ std::string SSN;
+ if (parseStringConstant(SSN))
+ return true;
+
+ SSID = Context.getOrInsertSyncScopeID(SSN);
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected ')' in syncscope");
+ }
+
+ return false;
+}
+
bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
Order = AtomicOrdering::NotAtomic;
if (Token.isNot(MIToken::Identifier))
@@ -2174,12 +2216,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
Flags |= MachineMemOperand::MOStore;
lex();
- // Optional "singlethread" scope.
- SynchronizationScope Scope = SynchronizationScope::CrossThread;
- if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") {
- Scope = SynchronizationScope::SingleThread;
- lex();
- }
+ // Optional synchronization scope.
+ SyncScope::ID SSID;
+ if (parseOptionalScope(MF.getFunction()->getContext(), SSID))
+ return true;
// Up to two atomic orderings (cmpxchg provides guarantees on failure).
AtomicOrdering Order, FailureOrder;
@@ -2244,7 +2284,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (expectAndConsume(MIToken::rparen))
return true;
Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range,
- Scope, Order, FailureOrder);
+ SSID, Order, FailureOrder);
return false;
}
@@ -2457,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
return false;
}
+void MIParser::initNames2MMOTargetFlags() {
+ if (!Names2MMOTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2MMOTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getMMOTargetFlag(StringRef Name,
+ MachineMemOperand::Flags &Flag) {
+ initNames2MMOTargetFlags();
+ auto FlagInfo = Names2MMOTargetFlags.find(Name);
+ if (FlagInfo == Names2MMOTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+bool MIParser::parseStringConstant(std::string &Result) {
+ if (Token.isNot(MIToken::StringConstant))
+ return error("expected string constant");
+ Result = Token.stringValue();
+ lex();
+ return false;
+}
+
bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
StringRef Src,
SMDiagnostic &Error) {
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index c524a9835f338..ddeacf1d1bfb1 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
@@ -139,6 +140,8 @@ class MIPrinter {
ModuleSlotTracker &MST;
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
+ /// Synchronization scope names registered with LLVMContext.
+ SmallVector<StringRef, 8> SSNs;
bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const;
bool canPredictSuccessors(const MachineBasicBlock &MBB) const;
@@ -162,7 +165,9 @@ public:
void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
unsigned I, bool ShouldPrintRegisterTies,
LLT TypeToPrint, bool IsDef = false);
- void print(const MachineMemOperand &Op);
+ void print(const LLVMContext &Context, const TargetInstrInfo &TII,
+ const MachineMemOperand &Op);
+ void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
};
@@ -731,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) {
if (!MI.memoperands_empty()) {
OS << " :: ";
+ const LLVMContext &Context = MF->getFunction()->getContext();
bool NeedComma = false;
for (const auto *Op : MI.memoperands()) {
if (NeedComma)
OS << ", ";
- print(*Op);
+ print(Context, *TII, *Op);
NeedComma = true;
}
}
@@ -1031,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
}
}
-void MIPrinter::print(const MachineMemOperand &Op) {
+static const char *getTargetMMOFlagName(const TargetInstrInfo &TII,
+ unsigned TMMOFlag) {
+ auto Flags = TII.getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TMMOFlag) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
+ const MachineMemOperand &Op) {
OS << '(';
- // TODO: Print operand's target specific flags.
if (Op.isVolatile())
OS << "volatile ";
if (Op.isNonTemporal())
@@ -1042,6 +1059,15 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << "dereferenceable ";
if (Op.isInvariant())
OS << "invariant ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1)
+ << "\" ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2)
+ << "\" ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3)
+ << "\" ";
if (Op.isLoad())
OS << "load ";
else {
@@ -1049,8 +1075,7 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << "store ";
}
- if (Op.getSynchScope() == SynchronizationScope::SingleThread)
- OS << "singlethread ";
+ printSyncScope(Context, Op.getSyncScopeID());
if (Op.getOrdering() != AtomicOrdering::NotAtomic)
OS << toIRString(Op.getOrdering()) << ' ';
@@ -1119,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << ')';
}
+void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) {
+ switch (SSID) {
+ case SyncScope::System: {
+ break;
+ }
+ default: {
+ if (SSNs.empty())
+ Context.getSyncScopeNames(SSNs);
+
+ OS << "syncscope(\"";
+ PrintEscapedString(SSNs[SSID], OS);
+ OS << "\") ";
+ break;
+ }
+ }
+}
+
static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
const TargetRegisterInfo *TRI) {
int Reg = TRI->getLLVMRegNum(DwarfReg, true);
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 2d4b95974cc64..447ad629885bf 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1917,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
return;
MachineBasicBlock *Top = *LoopChain.begin();
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
+
+ // If ExitingBB is already the last one in a chain then nothing to do.
+ if (Bottom == ExitingBB)
+ return;
+
bool ViableTopFallthrough = false;
for (MachineBasicBlock *Pred : Top->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
@@ -1931,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
// bottom is a viable exiting block. If so, bail out as rotating will
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
- MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
for (MachineBasicBlock *Succ : Bottom->successors()) {
BlockChain *SuccChain = BlockToChain[Succ];
if (!LoopBlockSet.count(Succ) &&
@@ -1944,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (ExitIt == LoopChain.end())
return;
+ // Rotating a loop exit to the bottom when there is a fallthrough to top
+ // trades the entry fallthrough for an exit fallthrough.
+ // If there is no bottom->top edge, but the chosen exit block does have
+ // a fallthrough, we break that fallthrough for nothing in return.
+
+ // Let's consider an example. We have a built chain of basic blocks
+ // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
+ // By doing a rotation we get
+ // Bk+1, ..., Bn, B1, ..., Bk
+ // Break of fallthrough to B1 is compensated by a fallthrough from Bk.
+ // If we had a fallthrough Bk -> Bk+1 it is broken now.
+ // It might be compensated by fallthrough Bn -> B1.
+ // So we have a condition to avoid creation of extra branch by loop rotation.
+ // All below must be true to avoid loop rotation:
+ // If there is a fallthrough to top (B1)
+ // There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
+ // There is no fallthrough from bottom (Bn) to top (B1).
+ // Please note that there is no exit fallthrough from Bn because we checked it
+ // above.
+ if (ViableTopFallthrough) {
+ assert(std::next(ExitIt) != LoopChain.end() &&
+ "Exit should not be last BB");
+ MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
+ if (ExitingBB->isSuccessor(NextBlockInChain))
+ if (!Bottom->isSuccessor(Top))
+ return;
+ }
+
+ DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
+ << " at bottom\n");
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index bbdae6e1a49e5..f88e175a97762 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -305,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *MachineFunction::getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
- SynchronizationScope SynchScope, AtomicOrdering Ordering,
+ SyncScope::ID SSID, AtomicOrdering Ordering,
AtomicOrdering FailureOrdering) {
return new (Allocator)
MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
- SynchScope, Ordering, FailureOrdering);
+ SSID, Ordering, FailureOrdering);
}
MachineMemOperand *
@@ -320,13 +320,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand(MachinePointerInfo(MMO->getValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSynchScope(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSynchScope(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -359,7 +359,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
(*I)->getFlags() & ~MachineMemOperand::MOStore,
(*I)->getSize(), (*I)->getBaseAlignment(),
(*I)->getAAInfo(), nullptr,
- (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getSyncScopeID(), (*I)->getOrdering(),
(*I)->getFailureOrdering());
Result[Index] = JustLoad;
}
@@ -393,7 +393,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
(*I)->getSize(), (*I)->getBaseAlignment(),
(*I)->getAAInfo(), nullptr,
- (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getSyncScopeID(), (*I)->getOrdering(),
(*I)->getFailureOrdering());
Result[Index] = JustStore;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 81c6dace92e04..afea5575a3ae5 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -447,6 +447,14 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
SmallString<16> Str;
getFPImm()->getValueAPF().toString(Str);
OS << "quad " << Str;
+ } else if (getFPImm()->getType()->isX86_FP80Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ OS << "x86_fp80 0xK";
+ APInt API = APF.bitcastToAPInt();
+ OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+ /*Upper=*/true);
+ OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
} else {
OS << getFPImm()->getValueAPF().convertToDouble();
}
@@ -606,7 +614,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
uint64_t s, unsigned int a,
const AAMDNodes &AAInfo,
const MDNode *Ranges,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
: PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
@@ -617,8 +625,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
- AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope);
- assert(getSynchScope() == SynchScope && "Value truncated");
+ AtomicInfo.SSID = static_cast<unsigned>(SSID);
+ assert(getSyncScopeID() == SSID && "Value truncated");
AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
assert(getOrdering() == Ordering && "Value truncated");
AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
@@ -744,6 +752,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
OS << "(dereferenceable)";
if (isInvariant())
OS << "(invariant)";
+ if (getFlags() & MOTargetFlag1)
+ OS << "(flag1)";
+ if (getFlags() & MOTargetFlag2)
+ OS << "(flag2)";
+ if (getFlags() & MOTargetFlag3)
+ OS << "(flag3)";
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index e65c256c1bb5a..fcb544806dda0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -985,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Operand should be tied", MO, MONum);
else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) {
+ const MachineOperand &MOTied = MI->getOperand(TiedTo);
+ if (!MOTied.isReg())
+ report("Tied counterpart must be a register", &MOTied, TiedTo);
+ else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) &&
+ MO->getReg() != MOTied.getReg())
+ report("Tied physical registers must match.", &MOTied, TiedTo);
+ }
} else if (MO->isReg() && MO->isTied())
report("Explicit operand should not be tied", MO, MONum);
} else {
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 5e279b065bbda..633a853b2c748 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -24,7 +24,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
STATISTIC(NumFused, "Number of instr pairs fused");
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index 425a59dc03752..4a50d895340a5 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -23,7 +23,7 @@
/// This pass traverses all the instructions in a program in top-down order.
/// In contrast to the instruction scheduling passes, this pass never resets
/// the hazard recognizer to ensure it can correctly handles noop hazards at
-/// the begining of blocks.
+/// the beginning of blocks.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index c606b7b833104..d5538be4bba25 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -203,6 +203,8 @@ namespace {
char RAFast::ID = 0;
}
+INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false)
+
/// getStackSpaceFor - This allocates space for the specified virtual register
/// to be held on the stack.
int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
@@ -244,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) {
if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
if (MO.getReg() == LR.PhysReg)
MO.setIsKill();
- else
- LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ // else, don't do anything we are problably redefining a
+ // subreg of this register and given we don't track which
+ // lanes are actually dead, we cannot insert a kill flag here.
+ // Otherwise we may end up in a situation like this:
+ // ... = (MO) physreg:sub1, physreg <implicit-use, kill>
+ // ... <== Here we would allow later pass to reuse physreg:sub1
+ // which is potentially wrong.
+ // LR:sub0 = ...
+ // ... = LR.sub1 <== This is going to use physreg:sub1
}
}
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 9562652556acb..020e81eca2dd2 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -2458,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
do {
Reg = RecoloringCandidates.pop_back_val();
- // We cannot recolor physcal register.
+ // We cannot recolor physical register.
if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index e3baff4be4bcf..9778103575fab 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -924,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
return createPBQPRegisterAllocator();
}
-
-#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index ff9bca092dbe5..a67d07b36474a 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1227,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SR->createDeadDef(DefIndex, Alloc);
}
}
+
+ // Make sure that the subrange for resultant undef is removed
+ // For example:
+ // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1
+ // vreg2<def> = COPY vreg1
+ // ==>
+ // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1
+ // ; Correct but need to remove the subrange for vreg2:sub0
+ // ; as it is now undef
+ if (NewIdx != 0 && DstInt.hasSubRanges()) {
+ // The affected subregister segments can be removed.
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
+ bool UpdatedSubRanges = false;
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if ((SR.LaneMask & DstMask).none()) {
+ DEBUG(dbgs() << "Removing undefined SubRange "
+ << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
+ // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
+ if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
+ SR.removeValNo(RmValNo);
+ UpdatedSubRanges = true;
+ }
+ }
+ }
+ if (UpdatedSubRanges)
+ DstInt.removeEmptySubRanges();
+ }
} else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 05e641d9489d9..fc5105aadbffd 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -375,7 +375,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
static std::pair<MCPhysReg, MachineBasicBlock::iterator>
findSurvivorBackwards(const MachineRegisterInfo &MRI,
MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
- const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) {
+ const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder,
+ bool RestoreAfter) {
bool FoundTo = false;
MCPhysReg Survivor = 0;
MachineBasicBlock::iterator Pos;
@@ -388,7 +389,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
for (MachineBasicBlock::iterator I = From;; --I) {
const MachineInstr &MI = *I;
- Used.accumulateBackward(MI);
+ Used.accumulate(MI);
if (I == To) {
// See if one of the registers in RC wasn't used so far.
@@ -401,6 +402,11 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
// the register which is not defined/used for the longest time.
FoundTo = true;
Pos = To;
+ // Note: It was fine so far to start our search at From, however now that
+ // we have to spill, and can only place the restore after From then
+ // add the regs used/defed by std::next(From) to the set.
+ if (RestoreAfter)
+ Used.accumulate(*std::next(From));
}
if (FoundTo) {
if (Survivor == 0 || !Used.available(Survivor)) {
@@ -575,7 +581,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator UseMI;
ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
- findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder);
+ findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder,
+ RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
assert(Reg != 0 && "No register left to scavenge!");
@@ -626,7 +633,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
assert(RealDef != nullptr && "Must have at least 1 Def");
#endif
- // We should only have one definition of the register. However to accomodate
+ // We should only have one definition of the register. However to accommodate
// the requirements of two address code we also allow definitions in
// subsequent instructions provided they also read the register. That way
// we get a single contiguous lifetime.
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 3cd270cec3a6d..5e95f760aaa24 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
return &TII->get(Node->getMachineOpcode());
}
+LLVM_DUMP_METHOD
+raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ switch (getKind()) {
+ case Data: OS << "Data"; break;
+ case Anti: OS << "Anti"; break;
+ case Output: OS << "Out "; break;
+ case Order: OS << "Ord "; break;
+ }
+
+ switch (getKind()) {
+ case Data:
+ OS << " Latency=" << getLatency();
+ if (TRI && isAssignedRegDep())
+ OS << " Reg=" << PrintReg(getReg(), TRI);
+ break;
+ case Anti:
+ case Output:
+ OS << " Latency=" << getLatency();
+ break;
+ case Order:
+ OS << " Latency=" << getLatency();
+ switch(Contents.OrdKind) {
+ case Barrier: OS << " Barrier"; break;
+ case MayAliasMem:
+ case MustAliasMem: OS << " Memory"; break;
+ case Artificial: OS << " Artificial"; break;
+ case Weak: OS << " Weak"; break;
+ case Cluster: OS << " Cluster"; break;
+ }
+ break;
+ }
+
+ return OS;
+}
+
bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this dependence, don't add a redundant one.
for (SDep &PredDep : Preds) {
@@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
-void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
- if (this == &DAG->ExitSU)
- OS << "ExitSU";
- else if (this == &DAG->EntrySU)
+raw_ostream &SUnit::print(raw_ostream &OS,
+ const SUnit *Entry, const SUnit *Exit) const {
+ if (this == Entry)
OS << "EntrySU";
+ else if (this == Exit)
+ OS << "ExitSU";
else
OS << "SU(" << NodeNum << ")";
+ return OS;
+}
+
+LLVM_DUMP_METHOD
+raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
+ return print(OS, &G->EntrySU, &G->ExitSU);
}
-LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD
+void SUnit::dump(const ScheduleDAG *G) const {
print(dbgs(), G);
dbgs() << ": ";
G->dumpNode(this);
@@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
- for (const SDep &SuccDep : Preds) {
- dbgs() << " ";
- switch (SuccDep.getKind()) {
- case SDep::Data: dbgs() << "data "; break;
- case SDep::Anti: dbgs() << "anti "; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ord "; break;
- }
- SuccDep.getSUnit()->print(dbgs(), G);
- if (SuccDep.isArtificial())
- dbgs() << " *";
- dbgs() << ": Latency=" << SuccDep.getLatency();
- if (SuccDep.isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
- dbgs() << "\n";
+ for (const SDep &Dep : Preds) {
+ dbgs() << " ";
+ Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
+ Dep.print(dbgs(), G->TRI); dbgs() << '\n';
}
}
if (Succs.size() != 0) {
dbgs() << " Successors:\n";
- for (const SDep &SuccDep : Succs) {
- dbgs() << " ";
- switch (SuccDep.getKind()) {
- case SDep::Data: dbgs() << "data "; break;
- case SDep::Anti: dbgs() << "anti "; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ord "; break;
- }
- SuccDep.getSUnit()->print(dbgs(), G);
- if (SuccDep.isArtificial())
- dbgs() << " *";
- dbgs() << ": Latency=" << SuccDep.getLatency();
- if (SuccDep.isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
- dbgs() << "\n";
+ for (const SDep &Dep : Succs) {
+ dbgs() << " ";
+ Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
+ Dep.print(dbgs(), G->TRI); dbgs() << '\n';
}
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0f70b0e9ca077..ccd937950a743 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -63,7 +63,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d901af7276860..71382c18fdf9d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -400,6 +400,7 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
+ SDValue reduceBuildVecToTrunc(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx);
@@ -5267,14 +5268,40 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
}
SDValue DAGCombiner::visitRotate(SDNode *N) {
+ SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (rot x, 0) -> x
+ if (isNullConstantOrNullSplatConstant(N1))
+ return N0;
+
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
- if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
- if (SDValue NewOp1 =
- distributeTruncateThroughAnd(N->getOperand(1).getNode()))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- N->getOperand(0), NewOp1);
- }
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
+ }
+
+ unsigned NextOp = N0.getOpcode();
+ // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+ if (NextOp == ISD::ROTL || NextOp == ISD::ROTR)
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (SDNode *C2 =
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ bool SameSide = (N->getOpcode() == NextOp);
+ unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
+ if (SDValue CombinedShift =
+ DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) {
+ unsigned Bitsize = VT.getScalarSizeInBits();
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT);
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode());
+ return DAG.getNode(
+ N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm);
+ }
+ }
return SDValue();
}
@@ -6091,19 +6118,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
+ SDLoc DL(N);
// fold (select C, X, X) -> X
if (N1 == N2)
return N1;
+
if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
// fold (select true, X, Y) -> X
// fold (select false, X, Y) -> Y
return !N0C->isNullValue() ? N1 : N2;
}
+
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or C, Y)
if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
+ return DAG.getNode(ISD::OR, DL, VT, N0, N2);
if (SDValue V = foldSelectOfConstants(N))
return V;
@@ -6112,22 +6142,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
+ return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
+ return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
}
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0, N1);
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N1, N2))
- return SDValue(N, 0); // Don't revisit N.
+ return SDValue(N, 0); // Don't revisit N.
if (VT0 == MVT::i1) {
// The code in this block deals with the following 2 equivalences:
@@ -6138,27 +6168,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// to the right anyway if we find the inner select exists in the DAG anyway
// and we always transform to the left side if we know that we can further
// optimize the combination of the conditions.
- bool normalizeToSequence
- = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ bool normalizeToSequence =
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
if (normalizeToSequence || !InnerSelect.use_empty())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
InnerSelect, N2);
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
if (normalizeToSequence || !InnerSelect.use_empty())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
InnerSelect);
}
@@ -6170,15 +6200,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
- SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
- N0, N1_0);
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
- N1_1, N2);
+ SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
}
// Otherwise see if we can optimize the "and" to a better pattern.
if (SDValue Combined = visitANDLike(N0, N1_0, N))
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
- N1_1, N2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
+ N2);
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -6189,15 +6217,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
- SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
- N0, N2_0);
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
- N1, N2_2);
+ SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
- N1, N2_2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
+ N2_2);
}
}
}
@@ -6208,8 +6234,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
SDValue Cond0 = N0->getOperand(0);
if (C->isOne())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
- Cond0, N2, N1);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
}
}
}
@@ -6226,24 +6251,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// FIXME: Instead of testing for UnsafeFPMath, this should be checking for
// no signed zeros as well as no nans.
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath &&
- VT.isFloatingPoint() && N0.hasOneUse() &&
+ if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1), N1, N2, CC,
- TLI, DAG))
+ if (SDValue FMinMax = combineMinNumMaxNum(
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
return FMinMax;
}
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- N1, N2, N0.getOperand(2));
- return SimplifySelect(SDLoc(N), N0, N1, N2);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, N0.getOperand(2));
+ return SimplifySelect(DL, N0, N1, N2);
}
return SDValue();
@@ -11045,7 +11067,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
//
// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
- // indexed load/store and the expresion that needs to be re-written.
+ // indexed load/store and the expression that needs to be re-written.
//
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
@@ -11379,7 +11401,7 @@ namespace {
/// Shift = srl Ty1 Origin, CstTy Amount
/// Inst = trunc Shift to Ty2
///
-/// Then, it will be rewriten into:
+/// Then, it will be rewritten into:
/// Slice = load SliceTy, Base + SliceOffset
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
///
@@ -12694,7 +12716,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
@@ -12706,7 +12728,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
FirstStoreAS, FirstStoreAlign, &IsFast) &&
IsFast) {
@@ -12723,7 +12745,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
!NoVectors) {
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
@@ -12781,7 +12804,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
@@ -12898,7 +12922,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -12912,7 +12936,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -12926,7 +12950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
@@ -14228,6 +14252,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return Shuffles[0];
}
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations which can be matched to a truncate.
+SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
+ // TODO: Add support for big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+ if (N->getNumOperands() < 2)
+ return SDValue();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = N->getNumOperands();
+
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
+ // index, bail out.
+ // TODO: Allow undef elements in some cases?
+ if (any_of(N->ops(), [VT](SDValue Op) {
+ return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ Op.getValueType() != VT.getVectorElementType();
+ }))
+ return SDValue();
+
+ // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
+ auto GetExtractIdx = [](SDValue Extract) {
+ return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
+ };
+
+ // The first BUILD_VECTOR operand must be an an extract from index zero
+ // (assuming no undef and little-endian).
+ if (GetExtractIdx(N->getOperand(0)) != 0)
+ return SDValue();
+
+ // Compute the stride from the first index.
+ int Stride = GetExtractIdx(N->getOperand(1));
+ SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
+
+ // Proceed only if the stride and the types can be matched to a truncate.
+ if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
+ (ExtractedFromVec.getValueType().getVectorNumElements() !=
+ Stride * NumElems) ||
+ (VT.getScalarSizeInBits() * Stride > 64))
+ return SDValue();
+
+ // Check remaining operands are consistent with the computed stride.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if ((Op.getOperand(0) != ExtractedFromVec) ||
+ (GetExtractIdx(Op) != Stride * i))
+ return SDValue();
+ }
+
+ // All checks were ok, construct the truncate.
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewVT = VT.getVectorVT(
+ Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
+ EVT TruncVT =
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+
+ SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
+ Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
+ return DAG.getBitcast(VT, Res);
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -14270,6 +14361,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
+ if (TLI.isDesirableToCombineBuildVectorToTruncate())
+ if (SDValue V = reduceBuildVecToTrunc(N))
+ return V;
+
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index b235e19aaab29..b96c96f0b4df4 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
} else
AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
- // Add the subregster being inserted
+ // Add the subregister being inserted
AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
MIB.addImm(SubIdx);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 873b2bd48f1e0..7e4bc3ccb5d39 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1991,7 +1991,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::move(Args))
.setTailCall(isTailCall)
.setSExtResult(isSigned)
- .setZExtResult(!isSigned);
+ .setZExtResult(!isSigned)
+ .setIsPostTypeLegalization(true);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2029,7 +2030,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
std::move(Args))
.setSExtResult(isSigned)
- .setZExtResult(!isSigned);
+ .setZExtResult(!isSigned)
+ .setIsPostTypeLegalization(true);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -3565,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
}
- BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
- DAG.getIntPtrConstant(0, dl));
- TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
- DAG.getIntPtrConstant(1, dl));
- // Ret is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, make sure the
- // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
- // folded.
- assert(Ret->use_empty() &&
- "Unexpected uses of illegally type from expanded lib call.");
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
}
if (isSigned) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index c1cb5d9b5235e..eaf177d0661b3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}
- // If R is null, the sub-method took care of registering the result.
- if (R.getNode()) {
+ if (R.getNode() && R.getNode() != N) {
SetSoftenedFloat(SDValue(N, ResNo), R);
- ReplaceSoftenFloatResult(N, ResNo, R);
+ // Return true only if the node is changed, assuming that the operands
+ // are also converted when necessary.
+ return true;
}
- // Return true only if the node is changed,
- // assuming that the operands are also converted when necessary.
+
// Otherwise, return false to tell caller to scan operands.
- return R.getNode() && R.getNode() != N;
+ return false;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
@@ -753,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;
+ case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
+ case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE:
@@ -791,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
return false;
- // When the operand type can be kept in registers, SoftenFloatResult
- // will call ReplaceValueWith to replace all references and we can
- // skip softening this operand.
+
+ // When the operand type can be kept in registers there is nothing to do for
+ // the following opcodes.
switch (N->getOperand(OpNo).getOpcode()) {
case ISD::BITCAST:
case ISD::ConstantFP:
@@ -807,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC:
return true;
}
- // For some opcodes, SoftenFloatResult handles all conversion of softening
- // and replacing operands, so that there is no need to soften operands
- // again, although such opcode could be scanned for other illegal operands.
+
switch (N->getOpcode()) {
- case ISD::ConstantFP:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- case ISD::FABS:
- case ISD::FCOPYSIGN:
- case ISD::FNEG:
- case ISD::Register:
- case ISD::SELECT:
+ case ISD::ConstantFP: // Leaf node.
+ case ISD::CopyFromReg: // Operand is a register that we know to be left
+ // unchanged by SoftenFloatResult().
+ case ISD::Register: // Leaf node.
return true;
}
return false;
@@ -829,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+ SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
+
+ if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
+ return SDValue();
+
+ if (N->getNumOperands() == 3)
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
+
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
+ N->getOperand(3)),
+ 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
// If we get here, the result must be legal but the source illegal.
EVT SVT = N->getOperand(0).getValueType();
@@ -884,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (Op == N->getOperand(0))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+
+ if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (Op == N->getOperand(0))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
EVT SVT = N->getOperand(0).getValueType();
@@ -913,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+ SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
+
+ if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),
+ 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 154af46c94464..001eed9fb8f62 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
SDValue Res(&Node, i);
+ EVT VT = Res.getValueType();
bool Failed = false;
unsigned Mapped = 0;
@@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
+ } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
}
} else {
- if (Mapped == 0) {
+ // If the value can be kept in HW registers, softening machinery can
+ // leave it unchanged and don't put it to any map.
+ if (Mapped == 0 &&
+ !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&
+ isLegalInHWReg(VT))) {
dbgs() << "Processed value not in any map!";
Failed = true;
} else if (Mapped & (Mapped - 1)) {
@@ -331,11 +336,6 @@ ScanOperands:
if (NeedsReanalyzing) {
assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
- // Remove any result values from SoftenedFloats as N will be revisited
- // again.
- for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i)
- SoftenedFloats.erase(SDValue(N, i));
-
N->setNodeId(NewNode);
// Recompute the NodeId and correct processed operands, adding the node to
// the worklist if ready.
@@ -754,8 +754,6 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// new uses of From due to CSE. If this happens, replace the new uses of
// From with To.
} while (!From.use_empty());
-
- SoftenedFloats.erase(From);
}
void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8e999188d8e10..e102df5e913d9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -416,16 +416,6 @@ private:
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
- void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
- // When the result type can be kept in HW registers, the converted
- // NewRes node could have the same type. We can save the effort in
- // cloning every user of N in SoftenFloatOperand or other legalization functions,
- // by calling ReplaceValueWith here to update all users.
- if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
- ReplaceValueWith(SDValue(N, ResNo), NewRes);
- }
-
// Convert Float Results to Integer for Non-HW-supported Operations.
bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -471,17 +461,23 @@ private:
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
// Return true if we can skip softening the given operand or SDNode because
- // it was soften before by SoftenFloatResult and references to the operand
- // were replaced by ReplaceValueWith.
+ // either it was soften before by SoftenFloatResult and references to the
+ // operand were replaced by ReplaceValueWith or it's value type is legal in HW
+ // registers and the operand can be left unchanged.
bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
// Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FABS(SDNode *N);
+ SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index aa69e0e2adfce..f3306151d864b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Expand the floating point operand only if it was converted to integers.
// Otherwise, it is a legal type like f128 that can be saved in a register.
auto SoftenedOp = GetSoftenedFloat(InOp);
- if (SoftenedOp == InOp)
+ if (isLegalInHWReg(SoftenedOp.getValueType()))
break;
SplitInteger(SoftenedOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ff0e609803d8a..d41054b15bbcf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2977,7 +2977,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
// Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
unsigned InMaskOpc = InMask->getOpcode();
+
+ // FIXME: This code seems to be too restrictive, we might consider
+ // generalizing it or dropping it.
assert((InMaskOpc == ISD::SETCC ||
+ ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) ||
(isLogicalMaskOp(InMaskOpc) &&
isSETCCorConvertedSETCC(InMask->getOperand(0)) &&
isSETCCorConvertedSETCC(InMask->getOperand(1)))) &&
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 98553152117d1..823e77850c4ba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -5442,7 +5443,7 @@ SDValue SelectionDAG::getAtomicCmpSwap(
unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment, AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+ AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -5458,7 +5459,7 @@ SDValue SelectionDAG::getAtomicCmpSwap(
MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SynchScope, SuccessOrdering,
+ AAMDNodes(), nullptr, SSID, SuccessOrdering,
FailureOrdering);
return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
@@ -5480,7 +5481,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
const Value *PtrVal, unsigned Alignment,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ SyncScope::ID SSID) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
@@ -5500,7 +5501,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SynchScope, Ordering);
+ nullptr, SSID, Ordering);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
}
@@ -7630,45 +7631,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
SDValue Loc = LD->getOperand(1);
SDValue BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI.getObjectSize(FI);
- int BFS = MFI.getObjectSize(BFI);
- if (FS != BFS || FS != (int)Bytes) return false;
- return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
- }
-
- // Handle X + C.
- if (isBaseWithConstantOffset(Loc)) {
- int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
- if (Loc.getOperand(0) == BaseLoc) {
- // If the base location is a simple address with no offset itself, then
- // the second load's first add operand should be the base address.
- if (LocOffset == Dist * (int)Bytes)
- return true;
- } else if (isBaseWithConstantOffset(BaseLoc)) {
- // The base location itself has an offset, so subtract that value from the
- // second load's offset before comparing to distance * size.
- int64_t BOffset =
- cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
- if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
- if ((LocOffset - BOffset) == Dist * (int)Bytes)
- return true;
- }
- }
- }
- const GlobalValue *GV1 = nullptr;
- const GlobalValue *GV2 = nullptr;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
- bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Bytes);
+
+ auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
+ auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+
+ int64_t Offset = 0;
+ if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
+ return (Dist * Bytes == Offset);
return false;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 4e899ae6668e7..0d69441ebb7f7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -37,13 +37,13 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // Match non-equal FrameIndexes - a FrameIndex stemming from an
- // alloca will not have it's ObjectOffset set until post-DAG and
- // as such we must assume the two framesIndices are incomparable.
+ // Match non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
- if (!MFI.getObjectAllocation(A->getIndex()) &&
- !MFI.getObjectAllocation(B->getIndex())) {
+ if (MFI.isFixedObjectIndex(A->getIndex()) &&
+ MFI.isFixedObjectIndex(B->getIndex())) {
Off += MFI.getObjectOffset(B->getIndex()) -
MFI.getObjectOffset(A->getIndex());
return true;
@@ -60,12 +60,18 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
int64_t Offset = 0;
bool IsIndexSignExt = false;
- // Consume constant adds
- while (Base->getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Base->getOperand(1))) {
- int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue();
- Offset += POffset;
- Base = Base->getOperand(0);
+ // Consume constant adds & ors with appropriate masking.
+ while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ // Only consider ORs which act as adds.
+ if (Base->getOpcode() == ISD::OR &&
+ !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue()))
+ break;
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
}
if (Base->getOpcode() == ISD::ADD) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index acf68fbbdedfc..41c3f5f235eab 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3220,7 +3220,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
-void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
+ Indices = IV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
@@ -3228,7 +3234,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
@@ -3268,13 +3274,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
DAG.getVTList(AggValueVTs), Values));
}
-void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
+ Indices = EV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
@@ -3559,6 +3571,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
MMOFlags |= MachineMemOperand::MOInvariant;
if (isDereferenceable)
MMOFlags |= MachineMemOperand::MODereferenceable;
+ MMOFlags |= TLI.getMMOFlags(I);
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -3688,6 +3701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
MMOFlags |= MachineMemOperand::MOVolatile;
if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
MMOFlags |= MachineMemOperand::MONonTemporal;
+ MMOFlags |= TLI.getMMOFlags(I);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
@@ -3978,7 +3992,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
AtomicOrdering FailureOrder = I.getFailureOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -3988,7 +4002,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope);
+ /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
SDValue OutChain = L.getValue(2);
@@ -4014,7 +4028,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
}
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -4025,7 +4039,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
I.getPointerOperand(),
- /* Alignment=*/ 0, Order, Scope);
+ /* Alignment=*/ 0, Order, SSID);
SDValue OutChain = L.getValue(1);
@@ -4040,7 +4054,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
Ops[0] = getRoot();
Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
- Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
+ Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -4048,7 +4062,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -4066,7 +4080,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
VT.getStoreSize(),
I.getAlignment() ? I.getAlignment() :
DAG.getEVTAlignment(VT),
- AAMDNodes(), nullptr, Scope, Order);
+ AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
@@ -4083,7 +4097,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -4100,7 +4114,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
getValue(I.getPointerOperand()),
getValue(I.getValueOperand()),
I.getPointerOperand(), I.getAlignment(),
- Order, Scope);
+ Order, SSID);
DAG.setRoot(OutChain);
}
@@ -4982,6 +4996,83 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(CallResult.second);
return nullptr;
}
+ case Intrinsic::memmove_element_unordered_atomic: {
+ auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ DAG.setRoot(CallResult.second);
+ return nullptr;
+ }
+ case Intrinsic::memset_element_unordered_atomic: {
+ auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Val = getValue(MI.getValue());
+ SDValue Length = getValue(MI.getLength());
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*DAG.getContext());
+ Entry.Node = Val;
+ Args.push_back(Entry);
+
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ DAG.setRoot(CallResult.second);
+ return nullptr;
+ }
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
DILocalVariable *Variable = DI.getVariable();
@@ -7842,6 +7933,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
auto &DL = CLI.DAG.getDataLayout();
ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+ if (CLI.IsPostTypeLegalization) {
+ // If we are lowering a libcall after legalization, split the return type.
+ SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
+ SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
+ for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
+ EVT RetVT = OldRetTys[i];
+ uint64_t Offset = OldOffsets[i];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
+ unsigned RegisterVTSize = RegisterVT.getSizeInBits();
+ RetTys.append(NumRegs, RegisterVT);
+ for (unsigned j = 0; j != NumRegs; ++j)
+ Offsets.push_back(Offset + j * RegisterVTSize);
+ }
+ }
+
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
@@ -7924,6 +8031,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ // FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
if (Args[i].IsByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 431d52b4b9b9f..ac1d6aae65a52 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -38,7 +38,6 @@ class BranchInst;
class CallInst;
class DbgValueInst;
class ExtractElementInst;
-class ExtractValueInst;
class FCmpInst;
class FPExtInst;
class FPToSIInst;
@@ -53,7 +52,6 @@ class IntToPtrInst;
class IndirectBrInst;
class InvokeInst;
class InsertElementInst;
-class InsertValueInst;
class Instruction;
class LoadInst;
class MachineBasicBlock;
@@ -859,8 +857,8 @@ private:
void visitInsertElement(const User &I);
void visitShuffleVector(const User &I);
- void visitExtractValue(const ExtractValueInst &I);
- void visitInsertValue(const InsertValueInst &I);
+ void visitExtractValue(const User &I);
+ void visitInsertValue(const User &I);
void visitLandingPad(const LandingPadInst &I);
void visitGetElementPtr(const User &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index f711ca71f79fe..bdf57e8058426 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1483,7 +1483,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->selectInstruction(Inst)) {
- FastISelFailed = true;
--NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
@@ -1506,8 +1505,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
+ FastISelFailed = true;
+
// Then handle certain instructions as single-LLVM-Instruction blocks.
- if (isa<CallInst>(Inst)) {
+ // We cannot separate out GCrelocates to their own blocks since we need
+ // to keep track of gc-relocates for a particular gc-statepoint. This is
+ // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
+ // visitGCRelocate.
+ if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) {
OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
Inst->getDebugLoc(), LLVMBB);
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 7886737b879c2..17a3a84ecda57 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -125,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
if (!LiveBBs.insert(BB).second)
return; // already been here.
- for (BasicBlock *PredBB : predecessors(BB))
- MarkBlocksLiveIn(PredBB, LiveBBs);
+ df_iterator_default_set<BasicBlock*> Visited;
+
+ for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
+ LiveBBs.insert(B);
+
}
/// substituteLPadValues - Substitute the values returned by the landingpad
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 008b984dd9616..323045fd2aaae 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -53,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
- SmallVector<const MachineBasicBlock *, 1> EHPadSucessors;
+ SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors;
for (const MachineBasicBlock *SMBB : MBB.successors())
if (SMBB->isEHPad())
- EHPadSucessors.push_back(SMBB);
+ EHPadSuccessors.push_back(SMBB);
// Compute insert points on the first call. The pair is independent of the
// current live interval.
@@ -68,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
LIP.first = LIS.getInstructionIndex(*FirstTerm);
// If there is a landing pad successor, also find the call instruction.
- if (EHPadSucessors.empty())
+ if (EHPadSuccessors.empty())
return LIP.first;
// There may not be a call instruction (?) in which case we ignore LPad.
LIP.second = LIP.first;
@@ -87,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
if (!LIP.second)
return LIP.first;
- if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) {
+ if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) {
return LIS.isLiveInToMBB(CurLI, EHPad);
}))
return LIP.first;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e9d38c10c8601..3914ee5147122 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -384,6 +384,26 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
"__llvm_memcpy_element_unordered_atomic_8";
Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
"__llvm_memcpy_element_unordered_atomic_16";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memmove_element_unordered_atomic_1";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memmove_element_unordered_atomic_2";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memmove_element_unordered_atomic_4";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memmove_element_unordered_atomic_8";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memmove_element_unordered_atomic_16";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memset_element_unordered_atomic_1";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memset_element_unordered_atomic_2";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memset_element_unordered_atomic_4";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memset_element_unordered_atomic_8";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memset_element_unordered_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -803,6 +823,40 @@ RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
}
}
+RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
+RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
/// InitCmpLibcallCCs - Set default comparison libcall CC.
///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index b9fa9b6a6ad7e..c2c02f8de03fb 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -62,6 +62,18 @@ private:
};
}
+static StringRef getSymbolKindName(SymbolKind Kind) {
+ switch (Kind) {
+#define SYMBOL_RECORD(EnumName, EnumVal, Name) \
+ case EnumName: \
+ return #Name;
+#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
+ default:
+ break;
+ }
+ return "UnknownSym";
+}
+
void CVSymbolDumperImpl::printLocalVariableAddrRange(
const LocalVariableAddrRange &Range, uint32_t RelocationOffset) {
DictScope S(W, "LocalVariableAddrRange");
@@ -86,18 +98,23 @@ void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) {
}
Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) {
+ W.startLine() << getSymbolKindName(CVR.Type);
+ W.getOStream() << " {\n";
+ W.indent();
+ W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames());
return Error::success();
}
Error CVSymbolDumperImpl::visitSymbolEnd(CVSymbol &CVR) {
if (PrintRecordBytes && ObjDelegate)
ObjDelegate->printBinaryBlockWithRelocs("SymData", CVR.content());
+
+ W.unindent();
+ W.startLine() << "}\n";
return Error::success();
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) {
- DictScope S(W, "BlockStart");
-
StringRef LinkageName;
W.printHex("PtrParent", Block.Parent);
W.printHex("PtrEnd", Block.End);
@@ -113,7 +130,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) {
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) {
- DictScope S(W, "Thunk32");
W.printNumber("Parent", Thunk.Parent);
W.printNumber("End", Thunk.End);
W.printNumber("Next", Thunk.Next);
@@ -126,7 +142,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) {
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
TrampolineSym &Tramp) {
- DictScope S(W, "Trampoline");
W.printEnum("Type", uint16_t(Tramp.Type), getTrampolineNames());
W.printNumber("Size", Tramp.Size);
W.printNumber("ThunkOff", Tramp.ThunkOffset);
@@ -137,7 +152,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) {
- DictScope S(W, "Section");
W.printNumber("SectionNumber", Section.SectionNumber);
W.printNumber("Alignment", Section.Alignment);
W.printNumber("Rva", Section.Rva);
@@ -152,7 +166,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) {
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
CoffGroupSym &CoffGroup) {
- DictScope S(W, "COFF Group");
W.printNumber("Size", CoffGroup.Size);
W.printFlags("Characteristics", CoffGroup.Characteristics,
getImageSectionCharacteristicNames(),
@@ -165,8 +178,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
BPRelativeSym &BPRel) {
- DictScope S(W, "BPRelativeSym");
-
W.printNumber("Offset", BPRel.Offset);
printTypeIndex("Type", BPRel.Type);
W.printString("VarName", BPRel.Name);
@@ -175,16 +186,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
BuildInfoSym &BuildInfo) {
- DictScope S(W, "BuildInfo");
-
W.printNumber("BuildId", BuildInfo.BuildId);
return Error::success();
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
CallSiteInfoSym &CallSiteInfo) {
- DictScope S(W, "CallSiteInfo");
-
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("CodeOffset",
@@ -200,8 +207,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
EnvBlockSym &EnvBlock) {
- DictScope S(W, "EnvBlock");
-
ListScope L(W, "Entries");
for (auto Entry : EnvBlock.Fields) {
W.printString(Entry);
@@ -211,7 +216,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FileStaticSym &FileStatic) {
- DictScope S(W, "FileStatic");
printTypeIndex("Index", FileStatic.Index);
W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset);
W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames());
@@ -220,7 +224,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
- DictScope S(W, "Export");
W.printNumber("Ordinal", Export.Ordinal);
W.printFlags("Flags", uint16_t(Export.Flags), getExportSymFlagNames());
W.printString("Name", Export.Name);
@@ -229,8 +232,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Compile2Sym &Compile2) {
- DictScope S(W, "CompilerFlags2");
-
W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames());
W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames());
W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames());
@@ -254,8 +255,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Compile3Sym &Compile3) {
- DictScope S(W, "CompilerFlags3");
-
W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames());
W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames());
W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
@@ -281,8 +280,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
ConstantSym &Constant) {
- DictScope S(W, "Constant");
-
printTypeIndex("Type", Constant.Type);
W.printNumber("Value", Constant.Value);
W.printString("Name", Constant.Name);
@@ -290,9 +287,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
- DictScope S(W, "DataSym");
-
- W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(),
@@ -308,15 +302,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR,
DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) {
- DictScope S(W, "DefRangeFramePointerRelFullScope");
W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset);
return Error::success();
}
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
- DictScope S(W, "DefRangeFramePointerRel");
-
W.printNumber("Offset", DefRangeFramePointerRel.Offset);
printLocalVariableAddrRange(DefRangeFramePointerRel.Range,
DefRangeFramePointerRel.getRelocationOffset());
@@ -326,8 +317,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
- DictScope S(W, "DefRangeRegisterRel");
-
W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register);
W.printBoolean("HasSpilledUDTMember",
DefRangeRegisterRel.hasSpilledUDTMember());
@@ -341,8 +330,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
- DictScope S(W, "DefRangeRegister");
-
W.printNumber("Register", DefRangeRegister.Hdr.Register);
W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
printLocalVariableAddrRange(DefRangeRegister.Range,
@@ -353,8 +340,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
- DictScope S(W, "DefRangeSubfieldRegister");
-
W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register);
W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent);
@@ -366,8 +351,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) {
- DictScope S(W, "DefRangeSubfield");
-
if (ObjDelegate) {
DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program);
@@ -387,8 +370,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
DefRangeSym &DefRange) {
- DictScope S(W, "DefRange");
-
if (ObjDelegate) {
DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
auto ExpectedProgram = Strings.getString(DefRange.Program);
@@ -406,8 +387,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FrameCookieSym &FrameCookie) {
- DictScope S(W, "FrameCookie");
-
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("CodeOffset",
@@ -423,8 +402,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FrameProcSym &FrameProc) {
- DictScope S(W, "FrameProc");
-
W.printHex("TotalFrameBytes", FrameProc.TotalFrameBytes);
W.printHex("PaddingFrameBytes", FrameProc.PaddingFrameBytes);
W.printHex("OffsetToPadding", FrameProc.OffsetToPadding);
@@ -440,8 +417,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) {
- DictScope S(W, "HeapAllocationSite");
-
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("CodeOffset",
@@ -458,8 +433,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
InlineSiteSym &InlineSite) {
- DictScope S(W, "InlineSite");
-
W.printHex("PtrParent", InlineSite.Parent);
W.printHex("PtrEnd", InlineSite.End);
printTypeIndex("Inlinee", InlineSite.Inlinee);
@@ -515,7 +488,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
RegisterSym &Register) {
- DictScope S(W, "RegisterSym");
printTypeIndex("Type", Register.Index);
W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
W.printString("Name", Register.Name);
@@ -523,7 +495,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) {
- DictScope S(W, "PublicSym");
W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames());
W.printNumber("Seg", Public.Segment);
W.printNumber("Off", Public.Offset);
@@ -532,7 +503,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) {
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) {
- DictScope S(W, "ProcRef");
W.printNumber("SumName", ProcRef.SumName);
W.printNumber("SymOffset", ProcRef.SymOffset);
W.printNumber("Mod", ProcRef.Module);
@@ -541,8 +511,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) {
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
- DictScope S(W, "Label");
-
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("CodeOffset", Label.getRelocationOffset(),
@@ -558,8 +526,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) {
- DictScope S(W, "Local");
-
printTypeIndex("Type", Local.Type);
W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames());
W.printString("VarName", Local.Name);
@@ -567,16 +533,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) {
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ObjNameSym &ObjName) {
- DictScope S(W, "ObjectName");
-
W.printHex("Signature", ObjName.Signature);
W.printString("ObjectName", ObjName.Name);
return Error::success();
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
- DictScope S(W, "ProcStart");
-
if (InFunctionScope)
return llvm::make_error<CodeViewError>(
"Visiting a ProcSym while inside function scope!");
@@ -584,7 +546,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
InFunctionScope = true;
StringRef LinkageName;
- W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
W.printHex("PtrParent", Proc.Parent);
W.printHex("PtrEnd", Proc.End);
W.printHex("PtrNext", Proc.Next);
@@ -607,13 +568,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
ScopeEndSym &ScopeEnd) {
- if (CVR.kind() == SymbolKind::S_END)
- DictScope S(W, "BlockEnd");
- else if (CVR.kind() == SymbolKind::S_PROC_ID_END)
- DictScope S(W, "ProcEnd");
- else if (CVR.kind() == SymbolKind::S_INLINESITE_END)
- DictScope S(W, "InlineSiteEnd");
-
InFunctionScope = false;
return Error::success();
}
@@ -627,8 +581,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
RegRelativeSym &RegRel) {
- DictScope S(W, "RegRelativeSym");
-
W.printHex("Offset", RegRel.Offset);
printTypeIndex("Type", RegRel.Type);
W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames());
@@ -638,8 +590,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
ThreadLocalDataSym &Data) {
- DictScope S(W, "ThreadLocalDataSym");
-
StringRef LinkageName;
if (ObjDelegate) {
ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(),
@@ -653,15 +603,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
- DictScope S(W, "UDT");
printTypeIndex("Type", UDT.Type);
W.printString("UDTName", UDT.Name);
return Error::success();
}
Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
- DictScope S(W, "UnknownSym");
- W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
W.printNumber("Length", CVR.length());
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 72cb9e2e35442..0d935c4472aef 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -382,6 +382,13 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_BUILDINFO:
Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags
break;
+ case SymbolKind::S_LTHREAD32:
+ case SymbolKind::S_GTHREAD32:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
+ break;
+ case SymbolKind::S_FILESTATIC:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
+ break;
case SymbolKind::S_LOCAL:
Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
break;
@@ -403,6 +410,10 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_INLINESITE:
Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee
break;
+ case SymbolKind::S_HEAPALLOCSITE:
+ // FIXME: It's not clear if this is a type or item reference.
+ Refs.push_back({TiRefKind::IndexRef, 8, 1}); // signature
+ break;
// Defranges don't have types, just registers and code offsets.
case SymbolKind::S_DEFRANGE_REGISTER:
@@ -419,6 +430,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_COMPILE:
case SymbolKind::S_COMPILE2:
case SymbolKind::S_COMPILE3:
+ case SymbolKind::S_ENVBLOCK:
case SymbolKind::S_BLOCK32:
case SymbolKind::S_FRAMEPROC:
break;
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index a18d4efec07a7..495e09fbae355 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -591,10 +591,10 @@ void DWARFContext::parseCompileUnits() {
void DWARFContext::parseTypeUnits() {
if (!TUs.empty())
return;
- for (const auto &I : getTypesSections()) {
+ forEachTypesSections([&](const DWARFSection &S) {
TUs.emplace_back();
- TUs.back().parse(*this, I.second);
- }
+ TUs.back().parse(*this, S);
+ });
}
void DWARFContext::parseDWOCompileUnits() {
@@ -604,10 +604,10 @@ void DWARFContext::parseDWOCompileUnits() {
void DWARFContext::parseDWOTypeUnits() {
if (!DWOTUs.empty())
return;
- for (const auto &I : getTypesDWOSections()) {
+ forEachTypesDWOSections([&](const DWARFSection &S) {
DWOTUs.emplace_back();
- DWOTUs.back().parseDWO(*this, I.second);
- }
+ DWOTUs.back().parseDWO(*this, S);
+ });
}
DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
@@ -937,27 +937,23 @@ DWARFContextInMemory::DWARFContextInMemory(
: FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()) {
for (const SectionRef &Section : Obj.sections()) {
- StringRef name;
- Section.getName(name);
+ StringRef Name;
+ Section.getName(Name);
// Skip BSS and Virtual sections, they aren't interesting.
- bool IsBSS = Section.isBSS();
- if (IsBSS)
- continue;
- bool IsVirtual = Section.isVirtual();
- if (IsVirtual)
+ if (Section.isBSS() || Section.isVirtual())
continue;
- StringRef data;
+ StringRef Data;
section_iterator RelocatedSection = Section.getRelocatedSection();
// Try to obtain an already relocated version of this section.
// Else use the unrelocated section from the object file. We'll have to
// apply relocations ourselves later.
- if (!L || !L->getLoadedSectionContents(*RelocatedSection, data))
- Section.getContents(data);
+ if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data))
+ Section.getContents(Data);
- if (auto Err = maybeDecompress(Section, name, data)) {
+ if (auto Err = maybeDecompress(Section, Name, Data)) {
ErrorPolicy EP = HandleError(
- createError("failed to decompress '" + name + "', ", std::move(Err)));
+ createError("failed to decompress '" + Name + "', ", std::move(Err)));
if (EP == ErrorPolicy::Halt)
return;
continue;
@@ -965,27 +961,27 @@ DWARFContextInMemory::DWARFContextInMemory(
// Compressed sections names in GNU style starts from ".z",
// at this point section is decompressed and we drop compression prefix.
- name = name.substr(
- name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes.
+ Name = Name.substr(
+ Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes.
+
+ // Map platform specific debug section names to DWARF standard section
+ // names.
+ Name = Obj.mapDebugSectionName(Name);
- if (StringRef *SectionData = MapSectionToMember(name)) {
- *SectionData = data;
- if (name == "debug_ranges") {
+ if (StringRef *SectionData = mapSectionToMember(Name)) {
+ *SectionData = Data;
+ if (Name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
- RangeDWOSection.Data = data;
+ RangeDWOSection.Data = Data;
}
- } else if (name == "debug_types") {
+ } else if (Name == "debug_types") {
// Find debug_types data by section rather than name as there are
// multiple, comdat grouped, debug_types sections.
- TypesSections[Section].Data = data;
- } else if (name == "debug_types.dwo") {
- TypesDWOSections[Section].Data = data;
+ TypesSections[Section].Data = Data;
+ } else if (Name == "debug_types.dwo") {
+ TypesDWOSections[Section].Data = Data;
}
- // Map platform specific debug section names to DWARF standard section
- // names.
- name = Obj.mapDebugSectionName(name);
-
if (RelocatedSection == Obj.section_end())
continue;
@@ -1012,21 +1008,8 @@ DWARFContextInMemory::DWARFContextInMemory(
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
- RelocAddrMap *Map =
- StringSwitch<RelocAddrMap *>(RelSecName)
- .Case("debug_info", &InfoSection.Relocs)
- .Case("debug_loc", &LocSection.Relocs)
- .Case("debug_info.dwo", &InfoDWOSection.Relocs)
- .Case("debug_line", &LineSection.Relocs)
- .Case("debug_str_offsets", &StringOffsetSection.Relocs)
- .Case("debug_ranges", &RangeSection.Relocs)
- .Case("debug_addr", &AddrSection.Relocs)
- .Case("apple_names", &AppleNamesSection.Relocs)
- .Case("apple_types", &AppleTypesSection.Relocs)
- .Case("apple_namespaces", &AppleNamespacesSection.Relocs)
- .Case("apple_namespac", &AppleNamespacesSection.Relocs)
- .Case("apple_objc", &AppleObjCSection.Relocs)
- .Default(nullptr);
+ DWARFSection *Sec = mapNameToDWARFSection(RelSecName);
+ RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr;
if (!Map) {
// Find debug_types relocs by section rather than name as there are
// multiple, comdat grouped, debug_types sections.
@@ -1059,10 +1042,10 @@ DWARFContextInMemory::DWARFContextInMemory(
object::RelocVisitor V(Obj);
uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address);
if (V.error()) {
- SmallString<32> Name;
- Reloc.getTypeName(Name);
+ SmallString<32> Type;
+ Reloc.getTypeName(Type);
ErrorPolicy EP = HandleError(
- createError("failed to compute relocation: " + Name + ", ",
+ createError("failed to compute relocation: " + Type + ", ",
errorCodeToError(object_error::parse_failed)));
if (EP == ErrorPolicy::Halt)
return;
@@ -1079,40 +1062,47 @@ DWARFContextInMemory::DWARFContextInMemory(
bool isLittleEndian)
: IsLittleEndian(isLittleEndian), AddressSize(AddrSize) {
for (const auto &SecIt : Sections) {
- if (StringRef *SectionData = MapSectionToMember(SecIt.first()))
+ if (StringRef *SectionData = mapSectionToMember(SecIt.first()))
*SectionData = SecIt.second->getBuffer();
}
}
-StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
+DWARFSection *DWARFContextInMemory::mapNameToDWARFSection(StringRef Name) {
+ return StringSwitch<DWARFSection *>(Name)
+ .Case("debug_info", &InfoSection)
+ .Case("debug_loc", &LocSection)
+ .Case("debug_line", &LineSection)
+ .Case("debug_str_offsets", &StringOffsetSection)
+ .Case("debug_ranges", &RangeSection)
+ .Case("debug_info.dwo", &InfoDWOSection)
+ .Case("debug_loc.dwo", &LocDWOSection)
+ .Case("debug_line.dwo", &LineDWOSection)
+ .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+ .Case("debug_addr", &AddrSection)
+ .Case("apple_names", &AppleNamesSection)
+ .Case("apple_types", &AppleTypesSection)
+ .Case("apple_namespaces", &AppleNamespacesSection)
+ .Case("apple_namespac", &AppleNamespacesSection)
+ .Case("apple_objc", &AppleObjCSection)
+ .Default(nullptr);
+}
+
+StringRef *DWARFContextInMemory::mapSectionToMember(StringRef Name) {
+ if (DWARFSection *Sec = mapNameToDWARFSection(Name))
+ return &Sec->Data;
return StringSwitch<StringRef *>(Name)
- .Case("debug_info", &InfoSection.Data)
.Case("debug_abbrev", &AbbrevSection)
- .Case("debug_loc", &LocSection.Data)
- .Case("debug_line", &LineSection.Data)
.Case("debug_aranges", &ARangeSection)
.Case("debug_frame", &DebugFrameSection)
.Case("eh_frame", &EHFrameSection)
.Case("debug_str", &StringSection)
- .Case("debug_str_offsets", &StringOffsetSection.Data)
- .Case("debug_ranges", &RangeSection.Data)
.Case("debug_macinfo", &MacinfoSection)
.Case("debug_pubnames", &PubNamesSection)
.Case("debug_pubtypes", &PubTypesSection)
.Case("debug_gnu_pubnames", &GnuPubNamesSection)
.Case("debug_gnu_pubtypes", &GnuPubTypesSection)
- .Case("debug_info.dwo", &InfoDWOSection.Data)
.Case("debug_abbrev.dwo", &AbbrevDWOSection)
- .Case("debug_loc.dwo", &LocDWOSection.Data)
- .Case("debug_line.dwo", &LineDWOSection.Data)
.Case("debug_str.dwo", &StringDWOSection)
- .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data)
- .Case("debug_addr", &AddrSection.Data)
- .Case("apple_names", &AppleNamesSection.Data)
- .Case("apple_types", &AppleTypesSection.Data)
- .Case("apple_namespaces", &AppleNamespacesSection.Data)
- .Case("apple_namespac", &AppleNamespacesSection.Data)
- .Case("apple_objc", &AppleObjCSection.Data)
.Case("debug_cu_index", &CUIndexSection)
.Case("debug_tu_index", &TUIndexSection)
.Case("gdb_index", &GdbIndexSection)
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index ef416f72ad175..111f0bbd44448 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -395,7 +395,7 @@ DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) :
void DWARFDie::attribute_iterator::updateForIndex(
const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) {
Index = I;
- // AbbrDecl must be valid befor calling this function.
+ // AbbrDecl must be valid before calling this function.
auto NumAttrs = AbbrDecl.getNumAttributes();
if (Index < NumAttrs) {
AttrValue.Attr = AbbrDecl.getAttrByIndex(Index);
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index e9fd29ccc4caf..ff01c948e0997 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -41,6 +41,7 @@ add_pdb_impl_folder(Native
Native/InfoStream.cpp
Native/InfoStreamBuilder.cpp
Native/ModuleDebugStream.cpp
+ Native/NativeBuiltinSymbol.cpp
Native/NativeCompilandSymbol.cpp
Native/NativeEnumModules.cpp
Native/NativeExeSymbol.cpp
@@ -53,6 +54,7 @@ add_pdb_impl_folder(Native
Native/PDBStringTableBuilder.cpp
Native/PDBTypeServerHandler.cpp
Native/PublicsStream.cpp
+ Native/PublicsStreamBuilder.cpp
Native/RawError.cpp
Native/SymbolStream.cpp
Native/TpiHashing.cpp
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 745dd742aadc3..897f78c510322 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -65,6 +65,10 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) {
ObjFileName = Name;
}
+void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) {
+ PdbFilePathNI = NI;
+}
+
void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
Symbols.push_back(Symbol);
// Symbols written to a PDB file are required to be 4 byte aligned. The same
@@ -111,7 +115,7 @@ void DbiModuleDescriptorBuilder::finalize() {
(void)Layout.Mod; // Set in constructor
(void)Layout.ModDiStream; // Set in finalizeMsfLayout
Layout.NumFiles = SourceFiles.size();
- Layout.PdbFilePathNI = 0;
+ Layout.PdbFilePathNI = PdbFilePathNI;
Layout.SrcFileNameNI = 0;
// This value includes both the signature field as well as the record bytes
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index a1f0671dec3e6..0eeac7e4c0847 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -225,6 +225,10 @@ void DbiStream::visitSectionContributions(
}
}
+Expected<StringRef> DbiStream::getECName(uint32_t NI) const {
+ return ECNames.getStringForID(NI);
+}
+
Error DbiStream::initializeSectionContributionData() {
if (SecContrSubstream.empty())
return Error::success();
@@ -248,6 +252,9 @@ Error DbiStream::initializeSectionHeadersData() {
return Error::success();
uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr);
+ if (StreamNum == kInvalidStreamIndex)
+ return Error::success();
+
if (StreamNum >= Pdb.getNumStreams())
return make_error<RawError>(raw_error_code::no_stream);
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index aad247ea185f2..25076e40fc98c 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -49,9 +49,17 @@ void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) {
SectionMap = SecMap;
}
+void DbiStreamBuilder::setSymbolRecordStreamIndex(uint32_t Index) {
+ SymRecordStreamIndex = Index;
+}
+
+void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) {
+ PublicsStreamIndex = Index;
+}
+
Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
ArrayRef<uint8_t> Data) {
- if (DbgStreams[(int)Type].StreamNumber)
+ if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex)
return make_error<RawError>(raw_error_code::duplicate_entry,
"The specified stream type already exists");
auto ExpectedIndex = Msf.addStream(Data.size());
@@ -63,11 +71,16 @@ Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
return Error::success();
}
+uint32_t DbiStreamBuilder::addECName(StringRef Name) {
+ return ECNamesBuilder.insert(Name);
+}
+
uint32_t DbiStreamBuilder::calculateSerializedLength() const {
// For now we only support serializing the header.
return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() +
calculateModiSubstreamSize() + calculateSectionContribsStreamSize() +
- calculateSectionMapStreamSize() + calculateDbgStreamsSize();
+ calculateSectionMapStreamSize() + calculateDbgStreamsSize() +
+ ECNamesBuilder.calculateSerializedSize();
}
Expected<DbiModuleDescriptorBuilder &>
@@ -247,15 +260,15 @@ Error DbiStreamBuilder::finalize() {
H->PdbDllVersion = PdbDllVersion;
H->MachineType = static_cast<uint16_t>(MachineType);
- H->ECSubstreamSize = 0;
+ H->ECSubstreamSize = ECNamesBuilder.calculateSerializedSize();
H->FileInfoSize = FileInfoBuffer.getLength();
H->ModiSubstreamSize = calculateModiSubstreamSize();
H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t);
H->SecContrSubstreamSize = calculateSectionContribsStreamSize();
H->SectionMapSize = calculateSectionMapStreamSize();
H->TypeServerSize = 0;
- H->SymRecordStreamIndex = kInvalidStreamIndex;
- H->PublicSymbolStreamIndex = kInvalidStreamIndex;
+ H->SymRecordStreamIndex = SymRecordStreamIndex;
+ H->PublicSymbolStreamIndex = PublicsStreamIndex;
H->MFCTypeServerIndex = kInvalidStreamIndex;
H->GlobalSymbolStreamIndex = kInvalidStreamIndex;
@@ -383,6 +396,9 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = Writer.writeStreamRef(FileInfoBuffer))
return EC;
+ if (auto EC = ECNamesBuilder.commit(Writer))
+ return EC;
+
for (auto &Stream : DbgStreams)
if (auto EC = Writer.writeInteger(Stream.StreamNumber))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 354b8c0e07ff5..6cdf6dde04d9f 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -86,7 +86,8 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const {
for (const auto &Name : OrderedStreamNames) {
auto Item = Mapping.find(Name);
- assert(Item != Mapping.end());
+ if (Item == Mapping.end())
+ continue;
if (auto EC = Writer.writeCString(Item->getKey()))
return EC;
}
@@ -108,7 +109,8 @@ uint32_t NamedStreamMap::finalize() {
for (const auto &Name : OrderedStreamNames) {
auto Item = Mapping.find(Name);
- assert(Item != Mapping.end());
+ if (Item == Mapping.end())
+ continue;
FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue());
FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1;
}
diff --git a/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
new file mode 100644
index 0000000000000..60416f69e137c
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
@@ -0,0 +1,48 @@
+//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
+
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession,
+ SymIndexId Id, PDB_BuiltinType T,
+ uint64_t L)
+ : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) {
+}
+
+NativeBuiltinSymbol::~NativeBuiltinSymbol() {}
+
+std::unique_ptr<NativeRawSymbol> NativeBuiltinSymbol::clone() const {
+ return llvm::make_unique<NativeBuiltinSymbol>(Session, SymbolId, Type, Length);
+}
+
+void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const {
+ // TODO: Apparently nothing needs this yet.
+}
+
+PDB_SymType NativeBuiltinSymbol::getSymTag() const {
+ return PDB_SymType::BuiltinType;
+}
+
+PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; }
+
+bool NativeBuiltinSymbol::isConstType() const { return false; }
+
+uint64_t NativeBuiltinSymbol::getLength() const { return Length; }
+
+bool NativeBuiltinSymbol::isUnalignedType() const { return false; }
+
+bool NativeBuiltinSymbol::isVolatileType() const { return false; }
+
+} // namespace pdb
+} // namespace llvm
diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index 180c169ec209c..7132a99a9f160 100644
--- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -15,7 +15,7 @@ namespace llvm {
namespace pdb {
NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session,
- uint32_t SymbolId,
+ SymIndexId SymbolId,
DbiModuleDescriptor MI)
: NativeRawSymbol(Session, SymbolId), Module(MI) {}
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index 6206155b9fb64..cb0830f453c8c 100644
--- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -18,7 +18,7 @@
namespace llvm {
namespace pdb {
-NativeExeSymbol::NativeExeSymbol(NativeSession &Session, uint32_t SymbolId)
+NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId)
: NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {}
std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const {
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index b4f5c96ce66be..92612bcea4ac4 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -13,7 +13,7 @@
using namespace llvm;
using namespace llvm::pdb;
-NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId)
+NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId)
: Session(PDBSession), SymbolId(SymbolId) {}
void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {}
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 93d43d9ef341f..76de0d8f9e7ef 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -10,9 +10,11 @@
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
@@ -33,6 +35,28 @@ using namespace llvm;
using namespace llvm::msf;
using namespace llvm::pdb;
+namespace {
+// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
+// to instantiate a NativeBuiltinSymbol for that type.
+static const struct BuiltinTypeEntry {
+ codeview::SimpleTypeKind Kind;
+ PDB_BuiltinType Type;
+ uint32_t Size;
+} BuiltinTypes[] = {
+ {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
+ {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
+ {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
+ {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
+ {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
+ {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
+ {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
+ {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
+ {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1}
+ // This table can be grown as necessary, but these are the only types we've
+ // needed so far.
+};
+} // namespace
+
NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile,
std::unique_ptr<BumpPtrAllocator> Allocator)
: Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
@@ -71,19 +95,51 @@ Error NativeSession::createFromExe(StringRef Path,
std::unique_ptr<PDBSymbolCompiland>
NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) {
- const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ const auto Id = static_cast<SymIndexId>(SymbolCache.size());
SymbolCache.push_back(
llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI));
return llvm::make_unique<PDBSymbolCompiland>(
*this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
}
+SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) {
+ // First see if it's already in our cache.
+ const auto Entry = TypeIndexToSymbolId.find(Index);
+ if (Entry != TypeIndexToSymbolId.end())
+ return Entry->second;
+
+ // Symbols for built-in types are created on the fly.
+ if (Index.isSimple()) {
+ // FIXME: We will eventually need to handle pointers to other simple types,
+ // which are still simple types in the world of CodeView TypeIndexes.
+ if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
+ return 0;
+ const auto Kind = Index.getSimpleKind();
+ const auto It =
+ std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes),
+ [Kind](const BuiltinTypeEntry &Builtin) {
+ return Builtin.Kind == Kind;
+ });
+ if (It == std::end(BuiltinTypes))
+ return 0;
+ SymIndexId Id = SymbolCache.size();
+ SymbolCache.emplace_back(
+ llvm::make_unique<NativeBuiltinSymbol>(*this, Id, It->Type, It->Size));
+ TypeIndexToSymbolId[Index] = Id;
+ return Id;
+ }
+
+ // TODO: Look up PDB type by type index
+
+ return 0;
+}
+
uint64_t NativeSession::getLoadAddress() const { return 0; }
void NativeSession::setLoadAddress(uint64_t Address) {}
std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
- const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ const auto Id = static_cast<SymIndexId>(SymbolCache.size());
SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id));
auto RawSymbol = SymbolCache[Id]->clone();
auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 4f6ebb0cb3428..0b6492efc70f3 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -385,8 +385,11 @@ bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
bool PDBFile::hasPDBGlobalsStream() {
auto DbiS = getPDBDbiStream();
- if (!DbiS)
+ if (!DbiS) {
+ consumeError(DbiS.takeError());
return false;
+ }
+
return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
}
@@ -396,8 +399,10 @@ bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
bool PDBFile::hasPDBPublicsStream() {
auto DbiS = getPDBDbiStream();
- if (!DbiS)
+ if (!DbiS) {
+ consumeError(DbiS.takeError());
return false;
+ }
return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 12b0c3b36c1dd..9f35fd73629cd 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -18,6 +18,7 @@
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
@@ -33,6 +34,8 @@ using namespace llvm::support;
PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
: Allocator(Allocator) {}
+PDBFileBuilder::~PDBFileBuilder() {}
+
Error PDBFileBuilder::initialize(uint32_t BlockSize) {
auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize);
if (!ExpectedMsf)
@@ -71,6 +74,12 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() {
return Strings;
}
+PublicsStreamBuilder &PDBFileBuilder::getPublicsBuilder() {
+ if (!Publics)
+ Publics = llvm::make_unique<PublicsStreamBuilder>(*Msf);
+ return *Publics;
+}
+
Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
auto ExpectedStream = Msf->addStream(Size);
if (!ExpectedStream)
@@ -96,8 +105,6 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
return std::move(EC);
if (auto EC = addNamedStream("/LinkInfo", 0))
return std::move(EC);
- if (auto EC = addNamedStream("/src/headerblock", 0))
- return std::move(EC);
if (Info) {
if (auto EC = Info->finalizeMsfLayout())
@@ -115,6 +122,14 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
if (auto EC = Ipi->finalizeMsfLayout())
return std::move(EC);
}
+ if (Publics) {
+ if (auto EC = Publics->finalizeMsfLayout())
+ return std::move(EC);
+ if (Dbi) {
+ Dbi->setPublicsStreamIndex(Publics->getStreamIndex());
+ Dbi->setSymbolRecordStreamIndex(Publics->getRecordStreamIdx());
+ }
+ }
return Msf->build();
}
@@ -194,5 +209,13 @@ Error PDBFileBuilder::commit(StringRef Filename) {
return EC;
}
+ if (Publics) {
+ auto PS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, Publics->getStreamIndex(), Allocator);
+ BinaryStreamWriter PSWriter(*PS);
+ if (auto EC = Publics->commit(PSWriter))
+ return EC;
+ }
+
return Buffer.commit();
}
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index f9f8ac219d357..acd45f7a62192 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
using namespace llvm::support;
using namespace llvm::pdb;
-uint32_t PDBStringTable::getByteSize() const { return ByteSize; }
+uint32_t PDBStringTable::getByteSize() const { return Header->ByteSize; }
uint32_t PDBStringTable::getNameCount() const { return NameCount; }
uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; }
uint32_t PDBStringTable::getSignature() const { return Header->Signature; }
diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 8f3474b9ce190..9c3e654f808ba 100644
--- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -41,19 +41,6 @@ using namespace llvm::msf;
using namespace llvm::support;
using namespace llvm::pdb;
-// This is PSGSIHDR struct defined in
-// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
-struct PublicsStream::HeaderInfo {
- ulittle32_t SymHash;
- ulittle32_t AddrMap;
- ulittle32_t NumThunks;
- ulittle32_t SizeOfThunk;
- ulittle16_t ISectThunkTable;
- char Padding[2];
- ulittle32_t OffThunkTable;
- ulittle32_t NumSections;
-};
-
PublicsStream::PublicsStream(PDBFile &File,
std::unique_ptr<MappedBlockStream> Stream)
: Pdb(File), Stream(std::move(Stream)) {}
@@ -72,7 +59,8 @@ Error PublicsStream::reload() {
BinaryStreamReader Reader(*Stream);
// Check stream size.
- if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader))
+ if (Reader.bytesRemaining() <
+ sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader))
return make_error<RawError>(raw_error_code::corrupt_file,
"Publics Stream does not contain a header.");
diff --git a/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp
new file mode 100644
index 0000000000000..28c4a8fc35d92
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp
@@ -0,0 +1,89 @@
+//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h"
+
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+
+#include "GSI.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {}
+
+PublicsStreamBuilder::~PublicsStreamBuilder() {}
+
+uint32_t PublicsStreamBuilder::calculateSerializedLength() const {
+ uint32_t Size = 0;
+ Size += sizeof(PublicsStreamHeader);
+ Size += sizeof(GSIHashHeader);
+ Size += HashRecords.size() * sizeof(PSHashRecord);
+ size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
+ uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
+ Size += NumBitmapEntries;
+
+ // FIXME: Account for hash buckets. For now since we we write a zero-bitmap
+ // indicating that no hash buckets are valid, we also write zero byets of hash
+ // bucket data.
+ Size += 0;
+ return Size;
+}
+
+Error PublicsStreamBuilder::finalizeMsfLayout() {
+ Expected<uint32_t> Idx = Msf.addStream(calculateSerializedLength());
+ if (!Idx)
+ return Idx.takeError();
+ StreamIdx = *Idx;
+
+ Expected<uint32_t> RecordIdx = Msf.addStream(0);
+ if (!RecordIdx)
+ return RecordIdx.takeError();
+ RecordStreamIdx = *RecordIdx;
+ return Error::success();
+}
+
+Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) {
+ PublicsStreamHeader PSH;
+ GSIHashHeader GSH;
+
+ // FIXME: Figure out what to put for these values.
+ PSH.AddrMap = 0;
+ PSH.ISectThunkTable = 0;
+ PSH.NumSections = 0;
+ PSH.NumThunks = 0;
+ PSH.OffThunkTable = 0;
+ PSH.SizeOfThunk = 0;
+ PSH.SymHash = 0;
+
+ GSH.VerSignature = GSIHashHeader::HdrSignature;
+ GSH.VerHdr = GSIHashHeader::HdrVersion;
+ GSH.HrSize = 0;
+ GSH.NumBuckets = 0;
+
+ if (auto EC = PublicsWriter.writeObject(PSH))
+ return EC;
+ if (auto EC = PublicsWriter.writeObject(GSH))
+ return EC;
+ if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords)))
+ return EC;
+
+ size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
+ uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
+ std::vector<uint8_t> BitmapData(NumBitmapEntries);
+ // FIXME: Build an actual bitmap
+ if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData)))
+ return EC;
+
+ // FIXME: Write actual hash buckets.
+ return Error::success();
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index ff8749fbfed48..1164d60ffc104 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -317,7 +317,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
raw_string_ostream MangledNameStream(MangledName);
Mangler::getNameWithPrefix(MangledNameStream, Name, getDataLayout());
}
- return findSymbol(MangledName, CheckFunctionsOnly).getAddress();
+ if (auto Sym = findSymbol(MangledName, CheckFunctionsOnly)) {
+ if (auto AddrOrErr = Sym.getAddress())
+ return *AddrOrErr;
+ else
+ report_fatal_error(AddrOrErr.takeError());
+ } else
+ report_fatal_error(Sym.takeError());
}
JITSymbol MCJIT::findSymbol(const std::string &Name,
@@ -599,11 +605,12 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef<GenericValue> ArgValues) {
void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
if (!isSymbolSearchingDisabled()) {
- void *ptr =
- reinterpret_cast<void*>(
- static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress()));
- if (ptr)
- return ptr;
+ if (auto Sym = Resolver.findSymbol(Name)) {
+ if (auto AddrOrErr = Sym.getAddress())
+ return reinterpret_cast<void*>(
+ static_cast<uintptr_t>(*AddrOrErr));
+ } else if (auto Err = Sym.takeError())
+ report_fatal_error(std::move(Err));
}
/// If a LazyFunctionCreator is installed, use it to get/create the function.
diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index 5fe259f80b6fb..de80cb1d0dd4c 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -60,12 +60,13 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; }
-LLVMOrcTargetAddress
+LLVMOrcErrorCode
LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
LLVMOrcLazyCompileCallbackFn Callback,
void *CallbackCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.createLazyCompileCallback(Callback, CallbackCtx);
+ return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx);
}
LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
@@ -82,38 +83,44 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
return J.setIndirectStubPointer(StubName, NewAddr);
}
-LLVMOrcModuleHandle
+LLVMOrcErrorCode
LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
LLVMSharedModuleRef Mod,
LLVMOrcSymbolResolverFn SymbolResolver,
void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
std::shared_ptr<Module> *M(unwrap(Mod));
- return J.addIRModuleEager(*M, SymbolResolver, SymbolResolverCtx);
+ return J.addIRModuleEager(*RetHandle, *M, SymbolResolver, SymbolResolverCtx);
}
-LLVMOrcModuleHandle
+LLVMOrcErrorCode
LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
LLVMSharedModuleRef Mod,
LLVMOrcSymbolResolverFn SymbolResolver,
void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
std::shared_ptr<Module> *M(unwrap(Mod));
- return J.addIRModuleLazy(*M, SymbolResolver, SymbolResolverCtx);
+ return J.addIRModuleLazy(*RetHandle, *M, SymbolResolver, SymbolResolverCtx);
}
-void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) {
+LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle H) {
OrcCBindingsStack &J = *unwrap(JITStack);
- J.removeModule(H);
+ return J.removeModule(H);
}
-LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
- const char *SymbolName) {
+LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
+ const char *SymbolName) {
OrcCBindingsStack &J = *unwrap(JITStack);
- auto Sym = J.findSymbol(SymbolName, true);
- return Sym.getAddress();
+ return J.findSymbolAddress(*RetAddr, SymbolName, true);
}
-void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
- delete unwrap(JITStack);
+LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
+ auto *J = unwrap(JITStack);
+ auto Err = J->shutdown();
+ delete J;
+ return Err;
}
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 931d0a9eb2ade..e38decf94f3e9 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -70,7 +70,7 @@ private:
virtual JITSymbol findSymbolIn(const std::string &Name,
bool ExportedSymbolsOnly) = 0;
- virtual void removeModule() = 0;
+ virtual Error removeModule() = 0;
};
template <typename LayerT> class GenericHandleImpl : public GenericHandle {
@@ -83,7 +83,7 @@ private:
return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
}
- void removeModule() override { return Layer.removeModule(Handle); }
+ Error removeModule() override { return Layer.removeModule(Handle); }
private:
LayerT &Layer;
@@ -105,6 +105,10 @@ public:
IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
: DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()),
CCMgr(std::move(CCMgr)),
+ ObjectLayer(
+ []() {
+ return std::make_shared<SectionMemoryManager>();
+ }),
CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
CODLayer(CompileLayer,
[](Function &F) { return std::set<Function *>({&F}); },
@@ -112,12 +116,14 @@ public:
CXXRuntimeOverrides(
[this](const std::string &S) { return mangle(S); }) {}
- ~OrcCBindingsStack() {
+ LLVMOrcErrorCode shutdown() {
// Run any destructors registered with __cxa_atexit.
CXXRuntimeOverrides.runDestructors();
// Run any IR destructors.
for (auto &DtorRunner : IRStaticDestructorRunners)
- DtorRunner.runViaLayer(*this);
+ if (auto Err = DtorRunner.runViaLayer(*this))
+ return mapError(std::move(Err));
+ return LLVMOrcErrSuccess;
}
std::string mangle(StringRef Name) {
@@ -134,14 +140,17 @@ public:
return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
}
- JITTargetAddress
- createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
+
+ LLVMOrcErrorCode
+ createLazyCompileCallback(JITTargetAddress &RetAddr,
+ LLVMOrcLazyCompileCallbackFn Callback,
void *CallbackCtx) {
auto CCInfo = CCMgr->getCompileCallback();
CCInfo.setCompileAction([=]() -> JITTargetAddress {
return Callback(wrap(this), CallbackCtx);
});
- return CCInfo.getAddress();
+ RetAddr = CCInfo.getAddress();
+ return LLVMOrcErrSuccess;
}
LLVMOrcErrorCode createIndirectStub(StringRef StubName,
@@ -155,12 +164,12 @@ public:
return mapError(IndirectStubsMgr->updatePointer(Name, Addr));
}
- std::unique_ptr<JITSymbolResolver>
+ std::shared_ptr<JITSymbolResolver>
createResolver(LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
return orc::createLambdaResolver(
[this, ExternalResolver, ExternalResolverCtx](const std::string &Name)
- -> JITSymbol {
+ -> JITSymbol {
// Search order:
// 1. JIT'd symbols.
// 2. Runtime overrides.
@@ -168,6 +177,9 @@ public:
if (auto Sym = CODLayer.findSymbol(Name, true))
return Sym;
+ else if (auto Err = Sym.takeError())
+ return Sym.takeError();
+
if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
return Sym;
@@ -178,16 +190,19 @@ public:
return JITSymbol(nullptr);
},
- [](const std::string &Name) {
+ [](const std::string &Name) -> JITSymbol {
return JITSymbol(nullptr);
});
}
template <typename LayerT>
- ModuleHandleT addIRModule(LayerT &Layer, std::shared_ptr<Module> M,
- std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
+ LLVMOrcErrorCode
+ addIRModule(ModuleHandleT &RetHandle, LayerT &Layer,
+ std::shared_ptr<Module> M,
+ std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+
// Attach a data-layout if one isn't already present.
if (M->getDataLayout().isDefault())
M->setDataLayout(DL);
@@ -204,43 +219,52 @@ public:
auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx);
// Add the module to the JIT.
- auto LH = Layer.addModule(std::move(M), std::move(MemMgr),
- std::move(Resolver));
- ModuleHandleT H = createHandle(Layer, LH);
+ ModuleHandleT H;
+ if (auto LHOrErr = Layer.addModule(std::move(M), std::move(Resolver)))
+ H = createHandle(Layer, *LHOrErr);
+ else
+ return mapError(LHOrErr.takeError());
// Run the static constructors, and save the static destructor runner for
// execution when the JIT is torn down.
orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), H);
- CtorRunner.runViaLayer(*this);
+ if (auto Err = CtorRunner.runViaLayer(*this))
+ return mapError(std::move(Err));
IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H);
- return H;
+ RetHandle = H;
+ return LLVMOrcErrSuccess;
}
- ModuleHandleT addIRModuleEager(std::shared_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- return addIRModule(CompileLayer, std::move(M),
+ LLVMOrcErrorCode addIRModuleEager(ModuleHandleT &RetHandle,
+ std::shared_ptr<Module> M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ return addIRModule(RetHandle, CompileLayer, std::move(M),
llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
- ModuleHandleT addIRModuleLazy(std::shared_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- return addIRModule(CODLayer, std::move(M),
+ LLVMOrcErrorCode addIRModuleLazy(ModuleHandleT &RetHandle,
+ std::shared_ptr<Module> M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ return addIRModule(RetHandle, CODLayer, std::move(M),
llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
- void removeModule(ModuleHandleT H) {
- GenericHandles[H]->removeModule();
+ LLVMOrcErrorCode removeModule(ModuleHandleT H) {
+ if (auto Err = GenericHandles[H]->removeModule())
+ return mapError(std::move(Err));
GenericHandles[H] = nullptr;
FreeHandleIndexes.push_back(H);
+ return LLVMOrcErrSuccess;
}
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+ JITSymbol findSymbol(const std::string &Name,
+ bool ExportedSymbolsOnly) {
if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
return Sym;
return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
@@ -251,6 +275,26 @@ public:
return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly);
}
+ LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr,
+ const std::string &Name,
+ bool ExportedSymbolsOnly) {
+ RetAddr = 0;
+ if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) {
+ // Successful lookup, non-null symbol:
+ if (auto AddrOrErr = Sym.getAddress()) {
+ RetAddr = *AddrOrErr;
+ return LLVMOrcErrSuccess;
+ } else
+ return mapError(AddrOrErr.takeError());
+ } else if (auto Err = Sym.takeError()) {
+ // Lookup failure - report error.
+ return mapError(std::move(Err));
+ }
+ // Otherwise we had a successful lookup but got a null result. We already
+ // set RetAddr to '0' above, so just return success.
+ return LLVMOrcErrSuccess;
+ }
+
const std::string &getErrorMessage() const { return ErrMsg; }
private:
diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp
index 9e70c4ac1dbff..df2d320e0f7aa 100644
--- a/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -45,6 +45,8 @@ public:
return "Could not negotiate RPC function";
case OrcErrorCode::RPCResponseAbandoned:
return "RPC response abandoned";
+ case OrcErrorCode::JITSymbolNotFound:
+ return "JIT symbol not found";
case OrcErrorCode::UnexpectedRPCCall:
return "Unexpected RPC call";
case OrcErrorCode::UnexpectedRPCResponse:
@@ -63,10 +65,29 @@ static ManagedStatic<OrcErrorCategory> OrcErrCat;
namespace llvm {
namespace orc {
+char JITSymbolNotFound::ID = 0;
+
std::error_code orcError(OrcErrorCode ErrCode) {
typedef std::underlying_type<OrcErrorCode>::type UT;
return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
}
+JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName)
+ : SymbolName(std::move(SymbolName)) {}
+
+std::error_code JITSymbolNotFound::convertToErrorCode() const {
+ typedef std::underlying_type<OrcErrorCode>::type UT;
+ return std::error_code(static_cast<UT>(OrcErrorCode::JITSymbolNotFound),
+ *OrcErrCat);
+}
+
+void JITSymbolNotFound::log(raw_ostream &OS) const {
+ OS << "Could not find symbol '" << SymbolName << "'";
+}
+
+const std::string &JITSymbolNotFound::getSymbolName() const {
+ return SymbolName;
+}
+
}
}
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 690276232a6f8..346a40405ff18 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -172,10 +172,13 @@ public:
std::shared_ptr<JITSymbolResolver> ClientResolver,
std::unique_ptr<TargetMachine> TM)
: ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
- MemMgr(*this, std::move(MemMgr)), Resolver(*this),
+ MemMgr(std::make_shared<MCJITReplacementMemMgr>(*this,
+ std::move(MemMgr))),
+ Resolver(std::make_shared<LinkingResolver>(*this)),
ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
NotifyFinalized(*this),
- ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
+ ObjectLayer([this]() { return this->MemMgr; }, NotifyObjectLoaded,
+ NotifyFinalized),
CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
LazyEmitLayer(CompileLayer) {}
@@ -199,20 +202,20 @@ public:
delete Mod;
};
LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter)));
- LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver);
+ cantFail(LazyEmitLayer.addModule(LocalModules.back(), Resolver));
}
void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
auto Obj =
std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O),
nullptr);
- ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
+ cantFail(ObjectLayer.addObject(std::move(Obj), Resolver));
}
void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
auto Obj =
std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O));
- ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
+ cantFail(ObjectLayer.addObject(std::move(Obj), Resolver));
}
void addArchive(object::OwningBinary<object::Archive> A) override {
@@ -231,7 +234,7 @@ public:
}
uint64_t getSymbolAddress(StringRef Name) {
- return findSymbol(Name).getAddress();
+ return cantFail(findSymbol(Name).getAddress());
}
JITSymbol findSymbol(StringRef Name) {
@@ -320,7 +323,7 @@ private:
auto Obj =
std::make_shared<object::OwningBinary<object::ObjectFile>>(
std::move(ChildObj), nullptr);
- ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
+ cantFail(ObjectLayer.addObject(std::move(Obj), Resolver));
if (auto Sym = ObjectLayer.findSymbol(Name, true))
return Sym;
}
@@ -341,7 +344,7 @@ private:
const LoadedObjectInfo &Info) const {
M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad);
M.SectionsAllocatedSinceLastLoad = SectionAddrSet();
- M.MemMgr.notifyObjectLoaded(&M, *Obj->getBinary());
+ M.MemMgr->notifyObjectLoaded(&M, *Obj->getBinary());
}
private:
OrcMCJITReplacement &M;
@@ -373,8 +376,8 @@ private:
using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>;
std::unique_ptr<TargetMachine> TM;
- MCJITReplacementMemMgr MemMgr;
- LinkingResolver Resolver;
+ std::shared_ptr<MCJITReplacementMemMgr> MemMgr;
+ std::shared_ptr<LinkingResolver> Resolver;
std::shared_ptr<JITSymbolResolver> ClientResolver;
Mangler Mang;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 2b69f1a0269fd..8198836f7a0c9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -128,7 +128,10 @@ void RuntimeDyldImpl::resolveRelocations() {
);
// First, resolve relocations associated with external symbols.
- resolveExternalSymbols();
+ if (auto Err = resolveExternalSymbols()) {
+ HasError = true;
+ ErrorStr = toString(std::move(Err));
+ }
// Iterate over all outstanding relocations
for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
@@ -243,9 +246,11 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
continue;
// Then check the symbol resolver to see if there's a definition
// elsewhere in this logical dylib.
- if (auto Sym = Resolver.findSymbolInLogicalDylib(Name))
+ if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) {
if (Sym.getFlags().isStrongDefinition())
continue;
+ } else if (auto Err = Sym.takeError())
+ return std::move(Err);
// else
JITSymFlags &= ~JITSymbolFlags::Weak;
}
@@ -953,7 +958,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
}
}
-void RuntimeDyldImpl::resolveExternalSymbols() {
+Error RuntimeDyldImpl::resolveExternalSymbols() {
while (!ExternalSymbolRelocations.empty()) {
StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin();
@@ -971,10 +976,24 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
// This is an external symbol, try to get its address from the symbol
// resolver.
// First search for the symbol in this logical dylib.
- Addr = Resolver.findSymbolInLogicalDylib(Name.data()).getAddress();
+ if (auto Sym = Resolver.findSymbolInLogicalDylib(Name.data())) {
+ if (auto AddrOrErr = Sym.getAddress())
+ Addr = *AddrOrErr;
+ else
+ return AddrOrErr.takeError();
+ } else if (auto Err = Sym.takeError())
+ return Err;
+
// If that fails, try searching for an external symbol.
- if (!Addr)
- Addr = Resolver.findSymbol(Name.data()).getAddress();
+ if (!Addr) {
+ if (auto Sym = Resolver.findSymbol(Name.data())) {
+ if (auto AddrOrErr = Sym.getAddress())
+ Addr = *AddrOrErr;
+ else
+ return AddrOrErr.takeError();
+ } else if (auto Err = Sym.takeError())
+ return Err;
+ }
// The call to getSymbolAddress may have caused additional modules to
// be loaded, which may have added new entries to the
// ExternalSymbolRelocations map. Consquently, we need to update our
@@ -1009,6 +1028,8 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
ExternalSymbolRelocations.erase(i);
}
+
+ return Error::success();
}
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1bd28ef37ed1c..1c54ad6fb03f8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -27,9 +27,12 @@ using namespace llvm::object;
namespace {
class LoadedCOFFObjectInfo final
- : public RuntimeDyld::LoadedObjectInfoHelper<LoadedCOFFObjectInfo> {
+ : public LoadedObjectInfoHelper<LoadedCOFFObjectInfo,
+ RuntimeDyld::LoadedObjectInfo> {
public:
- LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+ LoadedCOFFObjectInfo(
+ RuntimeDyldImpl &RTDyld,
+ RuntimeDyld::LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap)
: LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
OwningBinary<ObjectFile>
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index e45fdc7aee18a..5bc7434e703f6 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -742,7 +742,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
return InternalSymbol.getAddress();
- return getRTDyld().Resolver.findSymbol(Symbol).getAddress();
+ return cantFail(getRTDyld().Resolver.findSymbol(Symbol).getAddress());
}
uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 8b6f9bef66df9..77c968401c160 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -123,7 +123,8 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
}
class LoadedELFObjectInfo final
- : public RuntimeDyld::LoadedObjectInfoHelper<LoadedELFObjectInfo> {
+ : public LoadedObjectInfoHelper<LoadedELFObjectInfo,
+ RuntimeDyld::LoadedObjectInfo> {
public:
LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
: LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 5268bc5a18684..95b04fd932511 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -417,7 +417,7 @@ protected:
StubMap &Stubs) = 0;
/// \brief Resolve relocations to external symbols.
- void resolveExternalSymbols();
+ Error resolveExternalSymbols();
// \brief Compute an upper bound of the memory that is required to load all
// sections
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 00541e8c06fea..80e9c7ac18aac 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -27,7 +27,8 @@ using namespace llvm::object;
namespace {
class LoadedMachOObjectInfo final
- : public RuntimeDyld::LoadedObjectInfoHelper<LoadedMachOObjectInfo> {
+ : public LoadedObjectInfoHelper<LoadedMachOObjectInfo,
+ RuntimeDyld::LoadedObjectInfo> {
public:
LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld,
ObjSectionToIDMap ObjSecToIDMap)
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index b886021aee3fd..fa743c280e861 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -13,6 +13,7 @@ if( APPLE )
endif()
endif()
+set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}")
if( LLVM_USE_SANITIZE_COVERAGE )
if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address")
message(FATAL_ERROR
@@ -20,7 +21,6 @@ if( LLVM_USE_SANITIZE_COVERAGE )
"LLVM_USE_SANITIZE_COVERAGE=YES to be set."
)
endif()
- set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}")
# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror")
diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h
index 0f0573994a035..218ae5b6ac4d4 100644
--- a/lib/Fuzzer/FuzzerCorpus.h
+++ b/lib/Fuzzer/FuzzerCorpus.h
@@ -34,6 +34,7 @@ struct InputInfo {
size_t NumExecutedMutations = 0;
size_t NumSuccessfullMutations = 0;
bool MayDeleteFile = false;
+ std::vector<uint32_t> FeatureSet;
};
class InputCorpus {
@@ -68,24 +69,84 @@ class InputCorpus {
}
bool empty() const { return Inputs.empty(); }
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
- void AddToCorpus(const Unit &U, size_t NumFeatures,
- bool MayDeleteFile = false) {
+ void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
+ const std::vector<uint32_t> &FeatureSet) {
assert(!U.empty());
- uint8_t Hash[kSHA1NumBytes];
if (FeatureDebug)
Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
- ComputeSHA1(U.data(), U.size(), Hash);
- Hashes.insert(Sha1ToString(Hash));
Inputs.push_back(new InputInfo());
InputInfo &II = *Inputs.back();
II.U = U;
II.NumFeatures = NumFeatures;
II.MayDeleteFile = MayDeleteFile;
- memcpy(II.Sha1, Hash, kSHA1NumBytes);
+ II.FeatureSet = FeatureSet;
+ ComputeSHA1(U.data(), U.size(), II.Sha1);
+ Hashes.insert(Sha1ToString(II.Sha1));
UpdateCorpusDistribution();
+ PrintCorpus();
// ValidateFeatureSet();
}
+ // Debug-only
+ void PrintUnit(const Unit &U) {
+ if (!FeatureDebug) return;
+ for (uint8_t C : U) {
+ if (C != 'F' && C != 'U' && C != 'Z')
+ C = '.';
+ Printf("%c", C);
+ }
+ }
+
+ // Debug-only
+ void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) {
+ if (!FeatureDebug) return;
+ Printf("{");
+ for (uint32_t Feature: FeatureSet)
+ Printf("%u,", Feature);
+ Printf("}");
+ }
+
+ // Debug-only
+ void PrintCorpus() {
+ if (!FeatureDebug) return;
+ Printf("======= CORPUS:\n");
+ int i = 0;
+ for (auto II : Inputs) {
+ if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) {
+ Printf("[%2d] ", i);
+ Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
+ PrintUnit(II->U);
+ Printf(" ");
+ PrintFeatureSet(II->FeatureSet);
+ Printf("\n");
+ }
+ i++;
+ }
+ }
+
+ // If FeatureSet is that same as in II, replace II->U with {Data,Size}.
+ bool TryToReplace(InputInfo *II, const uint8_t *Data, size_t Size,
+ const std::vector<uint32_t> &FeatureSet) {
+ if (II->U.size() > Size && II->FeatureSet.size() &&
+ II->FeatureSet == FeatureSet) {
+ if (FeatureDebug)
+ Printf("Replace: %zd => %zd\n", II->U.size(), Size);
+ Replace(II, {Data, Data + Size});
+ PrintCorpus();
+ return true;
+ }
+ return false;
+ }
+
+ void Replace(InputInfo *II, const Unit &U) {
+ assert(II->U.size());
+ Hashes.erase(Sha1ToString(II->Sha1));
+ DeleteFile(*II);
+ ComputeSHA1(U.data(), U.size(), II->Sha1);
+ Hashes.insert(Sha1ToString(II->Sha1));
+ II->U = U;
+ }
+
bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
bool HasUnit(const std::string &H) { return Hashes.count(H); }
InputInfo &ChooseUnitToMutate(Random &Rand) {
@@ -124,10 +185,14 @@ class InputCorpus {
Printf("\n");
}
- void DeleteInput(size_t Idx) {
- InputInfo &II = *Inputs[Idx];
+ void DeleteFile(const InputInfo &II) {
if (!OutputCorpus.empty() && II.MayDeleteFile)
RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
+ }
+
+ void DeleteInput(size_t Idx) {
+ InputInfo &II = *Inputs[Idx];
+ DeleteFile(II);
Unit().swap(II.U);
if (FeatureDebug)
Printf("EVICTED %zd\n", Idx);
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 0453a7f443b53..87968893853e4 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -265,7 +265,7 @@ int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) {
Unit U = FileToVector(InputFilePath);
if (MaxLen && MaxLen < U.size())
U.resize(MaxLen);
- F->RunOne(U.data(), U.size());
+ F->ExecuteCallback(U.data(), U.size());
F->TryDetectingAMemoryLeak(U.data(), U.size(), true);
return 0;
}
@@ -441,7 +441,6 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) {
Printf("INFO: The input is small enough, exiting\n");
exit(0);
}
- Corpus->AddToCorpus(U, 0);
F->SetMaxInputLen(U.size());
F->SetMaxMutationLen(U.size() - 1);
F->MinimizeCrashLoop(U);
@@ -572,6 +571,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.UseCmp = Flags.use_cmp;
Options.UseValueProfile = Flags.use_value_profile;
Options.Shrink = Flags.shrink;
+ Options.ReduceInputs = Flags.reduce_inputs;
Options.ShuffleAtStartUp = Flags.shuffle;
Options.PreferSmall = Flags.prefer_small;
Options.ReloadIntervalSec = Flags.reload;
@@ -657,7 +657,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
size_t Size = SMR.ReadByteArraySize();
SMR.WriteByteArray(nullptr, 0);
const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size);
- F->RunOne(tmp.data(), tmp.size());
+ F->ExecuteCallback(tmp.data(), tmp.size());
SMR.PostServer();
}
return 0;
diff --git a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp
index 7b02b6f0b701b..503f0395cf8f8 100644
--- a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp
+++ b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp
@@ -41,7 +41,8 @@ namespace fuzzer {
ExternalFunctions::ExternalFunctions() {
#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
this->NAME = ::NAME; \
- CheckFnPtr((void *)::NAME, #NAME, WARN);
+ CheckFnPtr(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(::NAME)), \
+ #NAME, WARN);
#include "FuzzerExtFunctions.def"
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 7ff196c8fa960..5e70cbad3cf1f 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -65,7 +65,9 @@ FUZZER_FLAG_INT(use_memmem, 1,
FUZZER_FLAG_INT(use_value_profile, 0,
"Experimental. Use value profile to guide fuzzing.")
FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations")
-FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus elements.")
+FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.")
+FUZZER_FLAG_INT(reduce_inputs, 0, "Experimental. "
+ "Try to reduce the size of inputs wile preserving their full feature sets")
FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
" this number of jobs in separate worker processes"
" with stdout/stderr redirected to fuzz-JOB.log.")
diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp
index 75d4e3a06071e..742520267b73f 100644
--- a/lib/Fuzzer/FuzzerIOWindows.cpp
+++ b/lib/Fuzzer/FuzzerIOWindows.cpp
@@ -182,7 +182,7 @@ static size_t ParseFileName(const std::string &FileName, const size_t Offset) {
return Pos - Offset;
}
-// Parse a directory ending in separator, like: SomeDir\
+// Parse a directory ending in separator, like: `SomeDir\`
// Returns number of characters considered if successful.
static size_t ParseDir(const std::string &FileName, const size_t Offset) {
size_t Pos = Offset;
@@ -197,7 +197,7 @@ static size_t ParseDir(const std::string &FileName, const size_t Offset) {
return Pos - Offset;
}
-// Parse a servername and share, like: SomeServer\SomeShare\
+// Parse a servername and share, like: `SomeServer\SomeShare\`
// Returns number of characters considered if successful.
static size_t ParseServerAndShare(const std::string &FileName,
const size_t Offset) {
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index 5f184c2316e2a..a732f895375ed 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -65,7 +65,8 @@ public:
static void StaticFileSizeExceedCallback();
void ExecuteCallback(const uint8_t *Data, size_t Size);
- size_t RunOne(const uint8_t *Data, size_t Size);
+ bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
+ InputInfo *II = nullptr);
// Merge Corpora[1:] into Corpora[0].
void Merge(const std::vector<std::string> &Corpora);
@@ -95,13 +96,12 @@ private:
void InterruptCallback();
void MutateAndTestOne();
void ReportNewCoverage(InputInfo *II, const Unit &U);
- size_t RunOne(const Unit &U) { return RunOne(U.data(), U.size()); }
+ void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size);
void WriteToOutputCorpus(const Unit &U);
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
void PrintStatusForNewUnit(const Unit &U);
void ShuffleCorpus(UnitVector *V);
- void AddToCorpus(const Unit &U);
void CheckExitOnSrcPosOrItem();
// Trace-based fuzzing: we run a unit with some kind of tracing
@@ -142,6 +142,8 @@ private:
size_t MaxInputLen = 0;
size_t MaxMutationLen = 0;
+ std::vector<uint32_t> FeatureSetTmp;
+
// Need to know our own thread.
static thread_local bool IsMyThread;
};
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index fbf18357ede65..6816f3af8a6f7 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -22,9 +22,6 @@
#include <set>
#if defined(__has_include)
-#if __has_include(<sanitizer / coverage_interface.h>)
-#include <sanitizer/coverage_interface.h>
-#endif
#if __has_include(<sanitizer / lsan_interface.h>)
#include <sanitizer/lsan_interface.h>
#endif
@@ -348,11 +345,8 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
if (U.size() > MaxSize)
U.resize(MaxSize);
if (!Corpus.HasUnit(U)) {
- if (size_t NumFeatures = RunOne(U)) {
- CheckExitOnSrcPosOrItem();
- Corpus.AddToCorpus(U, NumFeatures);
+ if (RunOne(U.data(), U.size()))
Reloaded = true;
- }
}
}
if (Reloaded)
@@ -377,10 +371,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
ExecuteCallback(&dummy, 0);
for (const auto &U : *InitialCorpus) {
- if (size_t NumFeatures = RunOne(U)) {
- CheckExitOnSrcPosOrItem();
- Corpus.AddToCorpus(U, NumFeatures);
- }
+ RunOne(U.data(), U.size());
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);
}
@@ -392,18 +383,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
}
}
-size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) {
- if (!Size) return 0;
- TotalNumberOfRuns++;
-
- ExecuteCallback(Data, Size);
-
- size_t NumUpdatesBefore = Corpus.NumFeatureUpdates();
- TPC.CollectFeatures([&](size_t Feature) {
- Corpus.AddFeature(Feature, Size, Options.Shrink);
- });
- size_t NumUpdatesAfter = Corpus.NumFeatureUpdates();
-
+void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) &&
@@ -415,7 +395,34 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) {
Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds);
WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-");
}
- return NumUpdatesAfter - NumUpdatesBefore;
+}
+
+bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
+ InputInfo *II) {
+ if (!Size) return false;
+
+ ExecuteCallback(Data, Size);
+
+ FeatureSetTmp.clear();
+ size_t NumUpdatesBefore = Corpus.NumFeatureUpdates();
+ TPC.CollectFeatures([&](size_t Feature) {
+ Corpus.AddFeature(Feature, Size, Options.Shrink);
+ if (Options.ReduceInputs)
+ FeatureSetTmp.push_back(Feature);
+ });
+ PrintPulseAndReportSlowInput(Data, Size);
+ size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
+ if (NumNewFeatures) {
+ Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
+ FeatureSetTmp);
+ CheckExitOnSrcPosOrItem();
+ return true;
+ }
+ if (II && Corpus.TryToReplace(II, Data, Size, FeatureSetTmp)) {
+ CheckExitOnSrcPosOrItem();
+ return true;
+ }
+ return false;
}
size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const {
@@ -443,6 +450,7 @@ static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) {
}
void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) {
+ TotalNumberOfRuns++;
assert(InFuzzingThread());
if (SMR.IsClient())
SMR.WriteByteArray(Data, Size);
@@ -595,12 +603,9 @@ void Fuzzer::MutateAndTestOne() {
if (i == 0)
StartTraceRecording();
II.NumExecutedMutations++;
- if (size_t NumFeatures = RunOne(CurrentUnitData, Size)) {
- Corpus.AddToCorpus({CurrentUnitData, CurrentUnitData + Size}, NumFeatures,
- /*MayDeleteFile=*/true);
+ if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II))
ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size});
- CheckExitOnSrcPosOrItem();
- }
+
StopTraceRecording();
TryDetectingAMemoryLeak(CurrentUnitData, Size,
/*DuringInitialCorpusExecution*/ false);
@@ -638,7 +643,8 @@ void Fuzzer::MinimizeCrashLoop(const Unit &U) {
for (int i = 0; i < Options.MutateDepth; i++) {
size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen);
assert(NewSize > 0 && NewSize <= MaxMutationLen);
- RunOne(CurrentUnitData, NewSize);
+ ExecuteCallback(CurrentUnitData, NewSize);
+ PrintPulseAndReportSlowInput(CurrentUnitData, NewSize);
TryDetectingAMemoryLeak(CurrentUnitData, NewSize,
/*DuringInitialCorpusExecution*/ false);
}
diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h
index b1366789be007..9500235e2b1f3 100644
--- a/lib/Fuzzer/FuzzerOptions.h
+++ b/lib/Fuzzer/FuzzerOptions.h
@@ -32,6 +32,7 @@ struct FuzzingOptions {
bool UseCmp = false;
bool UseValueProfile = false;
bool Shrink = false;
+ bool ReduceInputs = false;
int ReloadIntervalSec = 1;
bool ShuffleAtStartUp = true;
bool PreferSmall = true;
diff --git a/lib/Fuzzer/FuzzerUtilDarwin.cpp b/lib/Fuzzer/FuzzerUtilDarwin.cpp
index 9674368c355ee..2df4872a92069 100644
--- a/lib/Fuzzer/FuzzerUtilDarwin.cpp
+++ b/lib/Fuzzer/FuzzerUtilDarwin.cpp
@@ -15,6 +15,8 @@
#include <mutex>
#include <signal.h>
#include <spawn.h>
+#include <stdlib.h>
+#include <string.h>
#include <sys/wait.h>
// There is no header for this on macOS so declare here
@@ -97,11 +99,16 @@ int ExecuteCommand(const std::string &Command) {
pid_t Pid;
char **Environ = environ; // Read from global
const char *CommandCStr = Command.c_str();
- const char *Argv[] = {"sh", "-c", CommandCStr, NULL};
+ char *const Argv[] = {
+ strdup("sh"),
+ strdup("-c"),
+ strdup(CommandCStr),
+ NULL
+ };
int ErrorCode = 0, ProcessStatus = 0;
// FIXME: We probably shouldn't hardcode the shell path.
ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes,
- (char *const *)Argv, Environ);
+ Argv, Environ);
(void)posix_spawnattr_destroy(&SpawnAttributes);
if (!ErrorCode) {
pid_t SavedPid = Pid;
@@ -120,6 +127,8 @@ int ExecuteCommand(const std::string &Command) {
// Shell execution failure.
ProcessStatus = W_EXITCODE(127, 0);
}
+ for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i)
+ free(Argv[i]);
// Restore the signal handlers of the current process when the last thread
// using this function finishes.
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 1cf6c9502a2b5..30566bdc87aed 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -118,6 +118,7 @@ set(Tests
SingleStrncmpTest
SpamyTest
ShrinkControlFlowTest
+ ShrinkControlFlowSimpleTest
ShrinkValueProfileTest
StrcmpTest
StrncmpOOBTest
@@ -271,5 +272,5 @@ add_lit_testsuite(check-fuzzer "Running Fuzzer tests"
# Don't add dependencies on Windows. The linker step would fail on Windows,
# since cmake will use link.exe for linking and won't include compiler-rt libs.
if(NOT MSVC)
- add_dependencies(check-fuzzer FileCheck sancov not)
+ add_dependencies(check-fuzzer FileCheck sancov not llvm-symbolizer)
endif()
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 812894fd947f9..1053c28527bfd 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -5,6 +5,9 @@
// with ASan) involving C++ standard library types when using libcxx.
#define _LIBCPP_HAS_NO_ASAN
+// Do not attempt to use LLVM ostream from gtest.
+#define GTEST_NO_LLVM_RAW_OSTREAM 1
+
#include "FuzzerCorpus.h"
#include "FuzzerDictionary.h"
#include "FuzzerInternal.h"
@@ -590,7 +593,7 @@ TEST(Corpus, Distribution) {
size_t N = 10;
size_t TriesPerUnit = 1<<16;
for (size_t i = 0; i < N; i++)
- C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0);
+ C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0, false, {});
std::vector<size_t> Hist(N);
for (size_t i = 0; i < N * TriesPerUnit; i++) {
diff --git a/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp
new file mode 100644
index 0000000000000..0afd26df23a0f
--- /dev/null
+++ b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size < 2) return 0;
+ if (Data[0] == 'F' && Data[Size / 2] == 'U' && Data[Size - 1] == 'Z')
+ Sink++;
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/reduce_inputs.test b/lib/Fuzzer/test/reduce_inputs.test
new file mode 100644
index 0000000000000..a4a5c57123d3f
--- /dev/null
+++ b/lib/Fuzzer/test/reduce_inputs.test
@@ -0,0 +1,13 @@
+# Test -reduce_inputs=1
+
+RUN: rm -rf %t/C
+RUN: mkdir -p %t/C
+RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -reduce_inputs=1 -runs=1000000 %t/C 2>&1 | FileCheck %s
+CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
+
+# Test that reduce_inputs deletes redundant files in the corpus.
+RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
+COUNT: READ units: 3
+
+
+
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index c7f112887a306..80371780fb6d9 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -2119,6 +2119,8 @@ class AssemblyWriter {
bool ShouldPreserveUseListOrder;
UseListOrderStack UseListOrders;
SmallVector<StringRef, 8> MDNames;
+ /// Synchronization scope names registered with LLVMContext.
+ SmallVector<StringRef, 8> SSNs;
public:
/// Construct an AssemblyWriter with an external SlotTracker
@@ -2134,10 +2136,15 @@ public:
void writeOperand(const Value *Op, bool PrintType);
void writeParamOperand(const Value *Operand, AttributeSet Attrs);
void writeOperandBundles(ImmutableCallSite CS);
- void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
- void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+ void writeSyncScope(const LLVMContext &Context,
+ SyncScope::ID SSID);
+ void writeAtomic(const LLVMContext &Context,
+ AtomicOrdering Ordering,
+ SyncScope::ID SSID);
+ void writeAtomicCmpXchg(const LLVMContext &Context,
+ AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope);
+ SyncScope::ID SSID);
void writeAllMDNodes();
void writeMDNode(unsigned Slot, const MDNode *Node);
@@ -2199,30 +2206,42 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
-void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
- if (Ordering == AtomicOrdering::NotAtomic)
- return;
+void AssemblyWriter::writeSyncScope(const LLVMContext &Context,
+ SyncScope::ID SSID) {
+ switch (SSID) {
+ case SyncScope::System: {
+ break;
+ }
+ default: {
+ if (SSNs.empty())
+ Context.getSyncScopeNames(SSNs);
- switch (SynchScope) {
- case SingleThread: Out << " singlethread"; break;
- case CrossThread: break;
+ Out << " syncscope(\"";
+ PrintEscapedString(SSNs[SSID], Out);
+ Out << "\")";
+ break;
+ }
}
+}
+
+void AssemblyWriter::writeAtomic(const LLVMContext &Context,
+ AtomicOrdering Ordering,
+ SyncScope::ID SSID) {
+ if (Ordering == AtomicOrdering::NotAtomic)
+ return;
+ writeSyncScope(Context, SSID);
Out << " " << toIRString(Ordering);
}
-void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+void AssemblyWriter::writeAtomicCmpXchg(const LLVMContext &Context,
+ AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+ SyncScope::ID SSID) {
assert(SuccessOrdering != AtomicOrdering::NotAtomic &&
FailureOrdering != AtomicOrdering::NotAtomic);
- switch (SynchScope) {
- case SingleThread: Out << " singlethread"; break;
- case CrossThread: break;
- }
-
+ writeSyncScope(Context, SSID);
Out << " " << toIRString(SuccessOrdering);
Out << " " << toIRString(FailureOrdering);
}
@@ -3215,21 +3234,22 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
// Print atomic ordering/alignment for memory operations
if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->isAtomic())
- writeAtomic(LI->getOrdering(), LI->getSynchScope());
+ writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID());
if (LI->getAlignment())
Out << ", align " << LI->getAlignment();
} else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->isAtomic())
- writeAtomic(SI->getOrdering(), SI->getSynchScope());
+ writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID());
if (SI->getAlignment())
Out << ", align " << SI->getAlignment();
} else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
- writeAtomicCmpXchg(CXI->getSuccessOrdering(), CXI->getFailureOrdering(),
- CXI->getSynchScope());
+ writeAtomicCmpXchg(CXI->getContext(), CXI->getSuccessOrdering(),
+ CXI->getFailureOrdering(), CXI->getSyncScopeID());
} else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
- writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
+ writeAtomic(RMWI->getContext(), RMWI->getOrdering(),
+ RMWI->getSyncScopeID());
} else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
- writeAtomic(FI->getOrdering(), FI->getSynchScope());
+ writeAtomic(FI->getContext(), FI->getOrdering(), FI->getSyncScopeID());
}
// Print Metadata info.
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 11259cbe18152..1cc229d68bfce 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -43,6 +43,7 @@ add_llvm_library(LLVMCore
Pass.cpp
PassManager.cpp
PassRegistry.cpp
+ SafepointIRVerifier.cpp
ProfileSummary.cpp
Statepoint.cpp
Type.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 3469026ad7ed6..23ccd8d4cf424 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -242,7 +242,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
// X | -1 -> -1.
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
- if (RHSC->isAllOnesValue())
+ if (RHSC->isMinusOne())
return RHSC;
Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
@@ -1015,33 +1015,33 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
switch (Opcode) {
case Instruction::Add:
- if (CI2->equalsInt(0)) return C1; // X + 0 == X
+ if (CI2->isZero()) return C1; // X + 0 == X
break;
case Instruction::Sub:
- if (CI2->equalsInt(0)) return C1; // X - 0 == X
+ if (CI2->isZero()) return C1; // X - 0 == X
break;
case Instruction::Mul:
- if (CI2->equalsInt(0)) return C2; // X * 0 == 0
- if (CI2->equalsInt(1))
+ if (CI2->isZero()) return C2; // X * 0 == 0
+ if (CI2->isOne())
return C1; // X * 1 == X
break;
case Instruction::UDiv:
case Instruction::SDiv:
- if (CI2->equalsInt(1))
+ if (CI2->isOne())
return C1; // X / 1 == X
- if (CI2->equalsInt(0))
+ if (CI2->isZero())
return UndefValue::get(CI2->getType()); // X / 0 == undef
break;
case Instruction::URem:
case Instruction::SRem:
- if (CI2->equalsInt(1))
+ if (CI2->isOne())
return Constant::getNullValue(CI2->getType()); // X % 1 == 0
- if (CI2->equalsInt(0))
+ if (CI2->isZero())
return UndefValue::get(CI2->getType()); // X % 0 == undef
break;
case Instruction::And:
if (CI2->isZero()) return C2; // X & 0 == 0
- if (CI2->isAllOnesValue())
+ if (CI2->isMinusOne())
return C1; // X & -1 == X
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
@@ -1078,12 +1078,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
break;
case Instruction::Or:
- if (CI2->equalsInt(0)) return C1; // X | 0 == X
- if (CI2->isAllOnesValue())
+ if (CI2->isZero()) return C1; // X | 0 == X
+ if (CI2->isMinusOne())
return C2; // X | -1 == -1
break;
case Instruction::Xor:
- if (CI2->equalsInt(0)) return C1; // X ^ 0 == X
+ if (CI2->isZero()) return C1; // X ^ 0 == X
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
switch (CE1->getOpcode()) {
@@ -1091,7 +1091,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
case Instruction::ICmp:
case Instruction::FCmp:
// cmp pred ^ true -> cmp !pred
- assert(CI2->equalsInt(1));
+ assert(CI2->isOne());
CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
pred = CmpInst::getInversePredicate(pred);
return ConstantExpr::getCompare(pred, CE1->getOperand(0),
@@ -1126,18 +1126,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
case Instruction::Mul:
return ConstantInt::get(CI1->getContext(), C1V * C2V);
case Instruction::UDiv:
- assert(!CI2->isNullValue() && "Div by zero handled above");
+ assert(!CI2->isZero() && "Div by zero handled above");
return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
case Instruction::SDiv:
- assert(!CI2->isNullValue() && "Div by zero handled above");
+ assert(!CI2->isZero() && "Div by zero handled above");
if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef
return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
case Instruction::URem:
- assert(!CI2->isNullValue() && "Div by zero handled above");
+ assert(!CI2->isZero() && "Div by zero handled above");
return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
case Instruction::SRem:
- assert(!CI2->isNullValue() && "Div by zero handled above");
+ assert(!CI2->isZero() && "Div by zero handled above");
if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef
return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
@@ -1170,7 +1170,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
case Instruction::LShr:
case Instruction::AShr:
case Instruction::Shl:
- if (CI1->equalsInt(0)) return C1;
+ if (CI1->isZero()) return C1;
break;
default:
break;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index d387a6f0ecb9a..e31779c83e3a3 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -512,7 +512,7 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
}
Constant *ConstantInt::getTrue(Type *Ty) {
- assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1.");
+ assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1.");
ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext());
if (auto *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::getSplat(VTy->getNumElements(), TrueC);
@@ -520,7 +520,7 @@ Constant *ConstantInt::getTrue(Type *Ty) {
}
Constant *ConstantInt::getFalse(Type *Ty) {
- assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1.");
+ assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1.");
ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext());
if (auto *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::getSplat(VTy->getNumElements(), FalseC);
@@ -1635,9 +1635,9 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) {
Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
- assert(C->getType()->getScalarType()->isPointerTy() &&
+ assert(C->getType()->isPtrOrPtrVectorTy() &&
"PtrToInt source must be pointer or pointer vector");
- assert(DstTy->getScalarType()->isIntegerTy() &&
+ assert(DstTy->isIntOrIntVectorTy() &&
"PtrToInt destination must be integer or integer vector");
assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
if (isa<VectorType>(C->getType()))
@@ -1648,9 +1648,9 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
- assert(C->getType()->getScalarType()->isIntegerTy() &&
+ assert(C->getType()->isIntOrIntVectorTy() &&
"IntToPtr source must be integer or integer vector");
- assert(DstTy->getScalarType()->isPointerTy() &&
+ assert(DstTy->isPtrOrPtrVectorTy() &&
"IntToPtr destination must be a pointer or pointer vector");
assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
if (isa<VectorType>(C->getType()))
@@ -1914,8 +1914,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS,
Constant *RHS, bool OnlyIfReduced) {
assert(LHS->getType() == RHS->getType());
- assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
- pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+ assert(CmpInst::isIntPredicate((CmpInst::Predicate)pred) &&
+ "Invalid ICmp Predicate");
if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
return FC; // Fold a few common cases...
@@ -1939,7 +1939,8 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS,
Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS,
Constant *RHS, bool OnlyIfReduced) {
assert(LHS->getType() == RHS->getType());
- assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+ assert(CmpInst::isFPPredicate((CmpInst::Predicate)pred) &&
+ "Invalid FCmp Predicate");
if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
return FC; // Fold a few common cases...
@@ -2379,32 +2380,32 @@ void ConstantDataSequential::destroyConstantImpl() {
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 1), Ty);
}
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 2), Ty);
}
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
/// getFP() constructors - Return a constant with array type with an element
@@ -2416,27 +2417,26 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context,
ArrayRef<uint16_t> Elts) {
Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 2), Ty);
}
Constant *ConstantDataArray::getFP(LLVMContext &Context,
ArrayRef<uint32_t> Elts) {
Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataArray::getFP(LLVMContext &Context,
ArrayRef<uint64_t> Elts) {
Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
Constant *ConstantDataArray::getString(LLVMContext &Context,
StringRef Str, bool AddNull) {
if (!AddNull) {
const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
- return get(Context, makeArrayRef(const_cast<uint8_t *>(Data),
- Str.size()));
+ return get(Context, makeArrayRef(Data, Str.size()));
}
SmallVector<uint8_t, 64> ElementVals;
@@ -2451,32 +2451,32 @@ Constant *ConstantDataArray::getString(LLVMContext &Context,
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 1), Ty);
}
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 2), Ty);
}
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
/// getFP() constructors - Return a constant with vector type with an element
@@ -2488,19 +2488,19 @@ Constant *ConstantDataVector::getFP(LLVMContext &Context,
ArrayRef<uint16_t> Elts) {
Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 2), Ty);
}
Constant *ConstantDataVector::getFP(LLVMContext &Context,
ArrayRef<uint32_t> Elts) {
Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 4), Ty);
}
Constant *ConstantDataVector::getFP(LLVMContext &Context,
ArrayRef<uint64_t> Elts) {
Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
- return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+ return getImpl(StringRef(Data, Elts.size() * 8), Ty);
}
Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
@@ -2555,13 +2555,13 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
switch (getElementType()->getIntegerBitWidth()) {
default: llvm_unreachable("Invalid bitwidth for CDS");
case 8:
- return *const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(EltPtr));
+ return *reinterpret_cast<const uint8_t *>(EltPtr);
case 16:
- return *const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(EltPtr));
+ return *reinterpret_cast<const uint16_t *>(EltPtr);
case 32:
- return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(EltPtr));
+ return *reinterpret_cast<const uint32_t *>(EltPtr);
case 64:
- return *const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(EltPtr));
+ return *reinterpret_cast<const uint64_t *>(EltPtr);
}
}
@@ -2589,16 +2589,13 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
assert(getElementType()->isFloatTy() &&
"Accessor can only be used when element is a 'float'");
- const float *EltPtr = reinterpret_cast<const float *>(getElementPointer(Elt));
- return *const_cast<float *>(EltPtr);
+ return *reinterpret_cast<const float *>(getElementPointer(Elt));
}
double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
assert(getElementType()->isDoubleTy() &&
"Accessor can only be used when element is a 'float'");
- const double *EltPtr =
- reinterpret_cast<const double *>(getElementPointer(Elt));
- return *const_cast<double *>(EltPtr);
+ return *reinterpret_cast<const double *>(getElementPointer(Elt));
}
Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 4ff0261a7f08f..2165ae5a94702 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -50,6 +50,7 @@ void llvm::initializeCore(PassRegistry &Registry) {
initializePrintModulePassWrapperPass(Registry);
initializePrintFunctionPassWrapperPass(Registry);
initializePrintBasicBlockPassPass(Registry);
+ initializeSafepointIRVerifierPass(Registry);
initializeVerifierLegacyPassPass(Registry);
}
@@ -2755,11 +2756,14 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) {
llvm_unreachable("Invalid AtomicOrdering value!");
}
+// TODO: Should this and other atomic instructions support building with
+// "syncscope"?
LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
LLVMBool isSingleThread, const char *Name) {
return wrap(
unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering),
- isSingleThread ? SingleThread : CrossThread,
+ isSingleThread ? SyncScope::SingleThread
+ : SyncScope::System,
Name));
}
@@ -3041,7 +3045,8 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break;
}
return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val),
- mapFromLLVMOrdering(ordering), singleThread ? SingleThread : CrossThread));
+ mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread
+ : SyncScope::System));
}
LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr,
@@ -3053,7 +3058,7 @@ LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr,
return wrap(unwrap(B)->CreateAtomicCmpXchg(unwrap(Ptr), unwrap(Cmp),
unwrap(New), mapFromLLVMOrdering(SuccessOrdering),
mapFromLLVMOrdering(FailureOrdering),
- singleThread ? SingleThread : CrossThread));
+ singleThread ? SyncScope::SingleThread : SyncScope::System));
}
@@ -3061,17 +3066,18 @@ LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) {
Value *P = unwrap<Value>(AtomicInst);
if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P))
- return I->getSynchScope() == SingleThread;
- return cast<AtomicCmpXchgInst>(P)->getSynchScope() == SingleThread;
+ return I->getSyncScopeID() == SyncScope::SingleThread;
+ return cast<AtomicCmpXchgInst>(P)->getSyncScopeID() ==
+ SyncScope::SingleThread;
}
void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) {
Value *P = unwrap<Value>(AtomicInst);
- SynchronizationScope Sync = NewValue ? SingleThread : CrossThread;
+ SyncScope::ID SSID = NewValue ? SyncScope::SingleThread : SyncScope::System;
if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P))
- return I->setSynchScope(Sync);
- return cast<AtomicCmpXchgInst>(P)->setSynchScope(Sync);
+ return I->setSyncScopeID(SSID);
+ return cast<AtomicCmpXchgInst>(P)->setSyncScopeID(SSID);
}
LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst) {
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 3dd653d2d0473..365cb019aec43 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -362,13 +362,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
(LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() ||
IgnoreAlignment) &&
LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
- LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
+ LI->getSyncScopeID() == cast<LoadInst>(I2)->getSyncScopeID();
if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
(SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() ||
IgnoreAlignment) &&
SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
- SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
+ SI->getSyncScopeID() == cast<StoreInst>(I2)->getSyncScopeID();
if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(I1))
@@ -386,7 +386,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
- FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
+ FI->getSyncScopeID() == cast<FenceInst>(I2)->getSyncScopeID();
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
CXI->isWeak() == cast<AtomicCmpXchgInst>(I2)->isWeak() &&
@@ -394,12 +394,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
CXI->getFailureOrdering() ==
cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
- CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
+ CXI->getSyncScopeID() ==
+ cast<AtomicCmpXchgInst>(I2)->getSyncScopeID();
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
- RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+ RMWI->getSyncScopeID() == cast<AtomicRMWInst>(I2)->getSyncScopeID();
return true;
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index a79b00be4ffe8..2c49564e328bd 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1304,34 +1304,34 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, Instruction *InsertBef)
: LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
- CrossThread, InsertBef) {}
+ SyncScope::System, InsertBef) {}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, BasicBlock *InsertAE)
: LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
- CrossThread, InsertAE) {}
+ SyncScope::System, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, AtomicOrdering Order,
- SynchronizationScope SynchScope, Instruction *InsertBef)
+ SyncScope::ID SSID, Instruction *InsertBef)
: UnaryInstruction(Ty, Load, Ptr, InsertBef) {
assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
setVolatile(isVolatile);
setAlignment(Align);
- setAtomic(Order, SynchScope);
+ setAtomic(Order, SSID);
AssertOK();
setName(Name);
}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, AtomicOrdering Order,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
BasicBlock *InsertAE)
: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
Load, Ptr, InsertAE) {
setVolatile(isVolatile);
setAlignment(Align);
- setAtomic(Order, SynchScope);
+ setAtomic(Order, SSID);
AssertOK();
setName(Name);
}
@@ -1419,16 +1419,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
Instruction *InsertBefore)
: StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
- CrossThread, InsertBefore) {}
+ SyncScope::System, InsertBefore) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
BasicBlock *InsertAtEnd)
: StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
- CrossThread, InsertAtEnd) {}
+ SyncScope::System, InsertAtEnd) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
unsigned Align, AtomicOrdering Order,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Type::getVoidTy(val->getContext()), Store,
OperandTraits<StoreInst>::op_begin(this),
@@ -1438,13 +1438,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<1>() = addr;
setVolatile(isVolatile);
setAlignment(Align);
- setAtomic(Order, SynchScope);
+ setAtomic(Order, SSID);
AssertOK();
}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
unsigned Align, AtomicOrdering Order,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(Type::getVoidTy(val->getContext()), Store,
OperandTraits<StoreInst>::op_begin(this),
@@ -1454,7 +1454,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<1>() = addr;
setVolatile(isVolatile);
setAlignment(Align);
- setAtomic(Order, SynchScope);
+ setAtomic(Order, SSID);
AssertOK();
}
@@ -1474,13 +1474,13 @@ void StoreInst::setAlignment(unsigned Align) {
void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+ SyncScope::ID SSID) {
Op<0>() = Ptr;
Op<1>() = Cmp;
Op<2>() = NewVal;
setSuccessOrdering(SuccessOrdering);
setFailureOrdering(FailureOrdering);
- setSynchScope(SynchScope);
+ setSyncScopeID(SSID);
assert(getOperand(0) && getOperand(1) && getOperand(2) &&
"All operands must be non-null!");
@@ -1507,25 +1507,25 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(
StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())),
AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this),
OperandTraits<AtomicCmpXchgInst>::operands(this), InsertBefore) {
- Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
+ Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID);
}
AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(
StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())),
AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this),
OperandTraits<AtomicCmpXchgInst>::operands(this), InsertAtEnd) {
- Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
+ Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID);
}
//===----------------------------------------------------------------------===//
@@ -1534,12 +1534,12 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ SyncScope::ID SSID) {
Op<0>() = Ptr;
Op<1>() = Val;
setOperation(Operation);
setOrdering(Ordering);
- setSynchScope(SynchScope);
+ setSyncScopeID(SSID);
assert(getOperand(0) && getOperand(1) &&
"All operands must be non-null!");
@@ -1554,24 +1554,24 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Val->getType(), AtomicRMW,
OperandTraits<AtomicRMWInst>::op_begin(this),
OperandTraits<AtomicRMWInst>::operands(this),
InsertBefore) {
- Init(Operation, Ptr, Val, Ordering, SynchScope);
+ Init(Operation, Ptr, Val, Ordering, SSID);
}
AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(Val->getType(), AtomicRMW,
OperandTraits<AtomicRMWInst>::op_begin(this),
OperandTraits<AtomicRMWInst>::operands(this),
InsertAtEnd) {
- Init(Operation, Ptr, Val, Ordering, SynchScope);
+ Init(Operation, Ptr, Val, Ordering, SSID);
}
//===----------------------------------------------------------------------===//
@@ -1579,19 +1579,19 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
//===----------------------------------------------------------------------===//
FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) {
setOrdering(Ordering);
- setSynchScope(SynchScope);
+ setSyncScopeID(SSID);
}
FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) {
setOrdering(Ordering);
- setSynchScope(SynchScope);
+ setSyncScopeID(SSID);
}
//===----------------------------------------------------------------------===//
@@ -3064,16 +3064,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
return false;
- return SrcTy->getScalarType()->isPointerTy() &&
- DstTy->getScalarType()->isIntegerTy();
+ return SrcTy->isPtrOrPtrVectorTy() && DstTy->isIntOrIntVectorTy();
case Instruction::IntToPtr:
if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
return false;
if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
return false;
- return SrcTy->getScalarType()->isIntegerTy() &&
- DstTy->getScalarType()->isPointerTy();
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isPtrOrPtrVectorTy();
case Instruction::BitCast: {
PointerType *SrcPtrTy = dyn_cast<PointerType>(SrcTy->getScalarType());
PointerType *DstPtrTy = dyn_cast<PointerType>(DstTy->getScalarType());
@@ -3797,12 +3795,12 @@ AllocaInst *AllocaInst::cloneImpl() const {
LoadInst *LoadInst::cloneImpl() const {
return new LoadInst(getOperand(0), Twine(), isVolatile(),
- getAlignment(), getOrdering(), getSynchScope());
+ getAlignment(), getOrdering(), getSyncScopeID());
}
StoreInst *StoreInst::cloneImpl() const {
return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
- getAlignment(), getOrdering(), getSynchScope());
+ getAlignment(), getOrdering(), getSyncScopeID());
}
@@ -3810,7 +3808,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
AtomicCmpXchgInst *Result =
new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
getSuccessOrdering(), getFailureOrdering(),
- getSynchScope());
+ getSyncScopeID());
Result->setVolatile(isVolatile());
Result->setWeak(isWeak());
return Result;
@@ -3818,14 +3816,14 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
AtomicRMWInst *AtomicRMWInst::cloneImpl() const {
AtomicRMWInst *Result =
- new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
- getOrdering(), getSynchScope());
+ new AtomicRMWInst(getOperation(), getOperand(0), getOperand(1),
+ getOrdering(), getSyncScopeID());
Result->setVolatile(isVolatile());
return Result;
}
FenceInst *FenceInst::cloneImpl() const {
- return new FenceInst(getContext(), getOrdering(), getSynchScope());
+ return new FenceInst(getContext(), getOrdering(), getSyncScopeID());
}
TruncInst *TruncInst::cloneImpl() const {
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 2e13f362344d8..c58459d6d5f5e 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -81,6 +81,18 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
assert(GCTransitionEntry->second == LLVMContext::OB_gc_transition &&
"gc-transition operand bundle id drifted!");
(void)GCTransitionEntry;
+
+ SyncScope::ID SingleThreadSSID =
+ pImpl->getOrInsertSyncScopeID("singlethread");
+ assert(SingleThreadSSID == SyncScope::SingleThread &&
+ "singlethread synchronization scope ID drifted!");
+ (void)SingleThreadSSID;
+
+ SyncScope::ID SystemSSID =
+ pImpl->getOrInsertSyncScopeID("");
+ assert(SystemSSID == SyncScope::System &&
+ "system synchronization scope ID drifted!");
+ (void)SystemSSID;
}
LLVMContext::~LLVMContext() { delete pImpl; }
@@ -255,6 +267,14 @@ uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
return pImpl->getOperandBundleTagID(Tag);
}
+SyncScope::ID LLVMContext::getOrInsertSyncScopeID(StringRef SSN) {
+ return pImpl->getOrInsertSyncScopeID(SSN);
+}
+
+void LLVMContext::getSyncScopeNames(SmallVectorImpl<StringRef> &SSNs) const {
+ pImpl->getSyncScopeNames(SSNs);
+}
+
void LLVMContext::setGC(const Function &Fn, std::string GCName) {
auto It = pImpl->GCNames.find(&Fn);
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index c19e1be44fdc7..57dd08b36fe70 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -205,6 +205,20 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const {
return I->second;
}
+SyncScope::ID LLVMContextImpl::getOrInsertSyncScopeID(StringRef SSN) {
+ auto NewSSID = SSC.size();
+ assert(NewSSID < std::numeric_limits<SyncScope::ID>::max() &&
+ "Hit the maximum number of synchronization scopes allowed!");
+ return SSC.insert(std::make_pair(SSN, SyncScope::ID(NewSSID))).first->second;
+}
+
+void LLVMContextImpl::getSyncScopeNames(
+ SmallVectorImpl<StringRef> &SSNs) const {
+ SSNs.resize(SSC.size());
+ for (const auto &SSE : SSC)
+ SSNs[SSE.second] = SSE.first();
+}
+
/// Singleton instance of the OptBisect class.
///
/// This singleton is accessed via the LLVMContext::getOptBisect() function. It
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 395beb57fe373..e413a4f344329 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1297,6 +1297,20 @@ public:
void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
uint32_t getOperandBundleTagID(StringRef Tag) const;
+ /// A set of interned synchronization scopes. The StringMap maps
+ /// synchronization scope names to their respective synchronization scope IDs.
+ StringMap<SyncScope::ID> SSC;
+
+ /// getOrInsertSyncScopeID - Maps synchronization scope name to
+ /// synchronization scope ID. Every synchronization scope registered with
+ /// LLVMContext has unique ID except pre-defined ones.
+ SyncScope::ID getOrInsertSyncScopeID(StringRef SSN);
+
+ /// getSyncScopeNames - Populates client supplied SmallVector with
+ /// synchronization scope names registered with LLVMContext. Synchronization
+ /// scope names are ordered by increasing synchronization scope IDs.
+ void getSyncScopeNames(SmallVectorImpl<StringRef> &SSNs) const;
+
/// Maintain the GC name for each function.
///
/// This saves allocating an additional word in Function for programs which
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index f8853ed169c5d..fdc7de6eaa34e 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -88,7 +88,7 @@ Module::~Module() {
delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
}
-RandomNumberGenerator *Module::createRNG(const Pass* P) const {
+std::unique_ptr<RandomNumberGenerator> Module::createRNG(const Pass* P) const {
SmallString<32> Salt(P->getPassName());
// This RNG is guaranteed to produce the same random stream only
@@ -103,7 +103,7 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const {
// store salt metadata from the Module constructor.
Salt += sys::path::filename(getModuleIdentifier());
- return new RandomNumberGenerator(Salt);
+ return std::unique_ptr<RandomNumberGenerator>{new RandomNumberGenerator(Salt)};
}
/// getNamedValue - Return the first global value in the module with
diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp
new file mode 100644
index 0000000000000..8b328c221da32
--- /dev/null
+++ b/lib/IR/SafepointIRVerifier.cpp
@@ -0,0 +1,437 @@
+//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Run a sanity check on the IR to ensure that Safepoints - if they've been
+// inserted - were inserted correctly. In particular, look for use of
+// non-relocated values after a safepoint. It's primary use is to check the
+// correctness of safepoint insertion immediately after insertion, but it can
+// also be used to verify that later transforms have not found a way to break
+// safepoint semenatics.
+//
+// In its current form, this verify checks a property which is sufficient, but
+// not neccessary for correctness. There are some cases where an unrelocated
+// pointer can be used after the safepoint. Consider this example:
+//
+// a = ...
+// b = ...
+// (a',b') = safepoint(a,b)
+// c = cmp eq a b
+// br c, ..., ....
+//
+// Because it is valid to reorder 'c' above the safepoint, this is legal. In
+// practice, this is a somewhat uncommon transform, but CodeGenPrep does create
+// idioms like this. The verifier knows about these cases and avoids reporting
+// false positives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/SafepointIRVerifier.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "safepoint-ir-verifier"
+
+using namespace llvm;
+
+/// This option is used for writing test cases. Instead of crashing the program
+/// when verification fails, report a message to the console (for FileCheck
+/// usage) and continue execution as if nothing happened.
+static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
+ cl::init(false));
+
+static void Verify(const Function &F, const DominatorTree &DT);
+
+struct SafepointIRVerifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTree DT;
+ SafepointIRVerifier() : FunctionPass(ID) {
+ initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ DT.recalculate(F);
+ Verify(F, DT);
+ return false; // no modifications
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ StringRef getPassName() const override { return "safepoint verifier"; }
+};
+
+void llvm::verifySafepointIR(Function &F) {
+ SafepointIRVerifier pass;
+ pass.runOnFunction(F);
+}
+
+char SafepointIRVerifier::ID = 0;
+
+FunctionPass *llvm::createSafepointIRVerifierPass() {
+ return new SafepointIRVerifier();
+}
+
+INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
+ "Safepoint IR Verifier", false, true)
+INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
+ "Safepoint IR Verifier", false, true)
+
+static bool isGCPointerType(Type *T) {
+ if (auto *PT = dyn_cast<PointerType>(T))
+ // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+ // GC managed heap. We know that a pointer into this heap needs to be
+ // updated and that no other pointer does.
+ return (1 == PT->getAddressSpace());
+ return false;
+}
+
+static bool containsGCPtrType(Type *Ty) {
+ if (isGCPointerType(Ty))
+ return true;
+ if (VectorType *VT = dyn_cast<VectorType>(Ty))
+ return isGCPointerType(VT->getScalarType());
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
+ return containsGCPtrType(AT->getElementType());
+ if (StructType *ST = dyn_cast<StructType>(Ty))
+ return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
+ containsGCPtrType);
+ return false;
+}
+
+// Debugging aid -- prints a [Begin, End) range of values.
+template<typename IteratorTy>
+static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
+ OS << "[ ";
+ while (Begin != End) {
+ OS << **Begin << " ";
+ ++Begin;
+ }
+ OS << "]";
+}
+
+/// The verifier algorithm is phrased in terms of availability. The set of
+/// values "available" at a given point in the control flow graph is the set of
+/// correctly relocated value at that point, and is a subset of the set of
+/// definitions dominating that point.
+
+/// State we compute and track per basic block.
+struct BasicBlockState {
+ // Set of values available coming in, before the phi nodes
+ DenseSet<const Value *> AvailableIn;
+
+ // Set of values available going out
+ DenseSet<const Value *> AvailableOut;
+
+ // AvailableOut minus AvailableIn.
+ // All elements are Instructions
+ DenseSet<const Value *> Contribution;
+
+ // True if this block contains a safepoint and thus AvailableIn does not
+ // contribute to AvailableOut.
+ bool Cleared = false;
+};
+
+
+/// Gather all the definitions dominating the start of BB into Result. This is
+/// simply the Defs introduced by every dominating basic block and the function
+/// arguments.
+static void GatherDominatingDefs(const BasicBlock *BB,
+ DenseSet<const Value *> &Result,
+ const DominatorTree &DT,
+ DenseMap<const BasicBlock *, BasicBlockState *> &BlockMap) {
+ DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
+
+ while (DTN->getIDom()) {
+ DTN = DTN->getIDom();
+ const auto &Defs = BlockMap[DTN->getBlock()]->Contribution;
+ Result.insert(Defs.begin(), Defs.end());
+ // If this block is 'Cleared', then nothing LiveIn to this block can be
+ // available after this block completes. Note: This turns out to be
+ // really important for reducing memory consuption of the initial available
+ // sets and thus peak memory usage by this verifier.
+ if (BlockMap[DTN->getBlock()]->Cleared)
+ return;
+ }
+
+ for (const Argument &A : BB->getParent()->args())
+ if (containsGCPtrType(A.getType()))
+ Result.insert(&A);
+}
+
+/// Model the effect of an instruction on the set of available values.
+static void TransferInstruction(const Instruction &I, bool &Cleared,
+ DenseSet<const Value *> &Available) {
+ if (isStatepoint(I)) {
+ Cleared = true;
+ Available.clear();
+ } else if (containsGCPtrType(I.getType()))
+ Available.insert(&I);
+}
+
+/// Compute the AvailableOut set for BB, based on the
+/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set
+/// when the verifier runs for the first time computing the AvailableOut set
+/// for BB.
+static void TransferBlock(const BasicBlock *BB,
+ BasicBlockState &BBS, bool FirstPass) {
+
+ const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn;
+ DenseSet<const Value *> &AvailableOut = BBS.AvailableOut;
+
+ if (BBS.Cleared) {
+ // AvailableOut does not change no matter how the input changes, just
+ // leave it be. We need to force this calculation the first time so that
+ // we have a AvailableOut at all.
+ if (FirstPass) {
+ AvailableOut = BBS.Contribution;
+ }
+ } else {
+ // Otherwise, we need to reduce the AvailableOut set by things which are no
+ // longer in our AvailableIn
+ DenseSet<const Value *> Temp = BBS.Contribution;
+ set_union(Temp, AvailableIn);
+ AvailableOut = std::move(Temp);
+ }
+
+ DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
+ PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
+ dbgs() << " to ";
+ PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
+ dbgs() << "\n";);
+}
+
+/// A given derived pointer can have multiple base pointers through phi/selects.
+/// This type indicates when the base pointer is exclusively constant
+/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
+/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
+/// NonConstant.
+enum BaseType {
+ NonConstant = 1, // Base pointers is not exclusively constant.
+ ExclusivelyNull,
+ ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
+ // set of constants, but they are not exclusively
+ // null.
+};
+
+/// Return the baseType for Val which states whether Val is exclusively
+/// derived from constant/null, or not exclusively derived from constant.
+/// Val is exclusively derived off a constant base when all operands of phi and
+/// selects are derived off a constant base.
+static enum BaseType getBaseType(const Value *Val) {
+
+ SmallVector<const Value *, 32> Worklist;
+ DenseSet<const Value *> Visited;
+ bool isExclusivelyDerivedFromNull = true;
+ Worklist.push_back(Val);
+ // Strip through all the bitcasts and geps to get base pointer. Also check for
+ // the exclusive value when there can be multiple base pointers (through phis
+ // or selects).
+ while(!Worklist.empty()) {
+ const Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ if (const auto *CI = dyn_cast<CastInst>(V)) {
+ Worklist.push_back(CI->stripPointerCasts());
+ continue;
+ }
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ Worklist.push_back(GEP->getPointerOperand());
+ continue;
+ }
+ // Push all the incoming values of phi node into the worklist for
+ // processing.
+ if (const auto *PN = dyn_cast<PHINode>(V)) {
+ for (Value *InV: PN->incoming_values())
+ Worklist.push_back(InV);
+ continue;
+ }
+ if (const auto *SI = dyn_cast<SelectInst>(V)) {
+ // Push in the true and false values
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+ if (isa<Constant>(V)) {
+ // We found at least one base pointer which is non-null, so this derived
+ // pointer is not exclusively derived from null.
+ if (V != Constant::getNullValue(V->getType()))
+ isExclusivelyDerivedFromNull = false;
+ // Continue processing the remaining values to make sure it's exclusively
+ // constant.
+ continue;
+ }
+ // At this point, we know that the base pointer is not exclusively
+ // constant.
+ return BaseType::NonConstant;
+ }
+ // Now, we know that the base pointer is exclusively constant, but we need to
+ // differentiate between exclusive null constant and non-null constant.
+ return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
+ : BaseType::ExclusivelySomeConstant;
+}
+
+static void Verify(const Function &F, const DominatorTree &DT) {
+ SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
+ DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
+
+ DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n");
+ if (PrintOnly)
+ dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
+
+
+ for (const BasicBlock &BB : F) {
+ BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState;
+ for (const auto &I : BB)
+ TransferInstruction(I, BBS->Cleared, BBS->Contribution);
+ BlockMap[&BB] = BBS;
+ }
+
+ for (auto &BBI : BlockMap) {
+ GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap);
+ TransferBlock(BBI.first, *BBI.second, true);
+ }
+
+ SetVector<const BasicBlock *> Worklist;
+ for (auto &BBI : BlockMap)
+ Worklist.insert(BBI.first);
+
+ // This loop iterates the AvailableIn and AvailableOut sets to a fixed point.
+ // The AvailableIn and AvailableOut sets decrease as we iterate.
+ while (!Worklist.empty()) {
+ const BasicBlock *BB = Worklist.pop_back_val();
+ BasicBlockState *BBS = BlockMap[BB];
+
+ size_t OldInCount = BBS->AvailableIn.size();
+ for (const BasicBlock *PBB : predecessors(BB))
+ set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);
+
+ if (OldInCount == BBS->AvailableIn.size())
+ continue;
+
+ assert(OldInCount > BBS->AvailableIn.size() && "invariant!");
+
+ size_t OldOutCount = BBS->AvailableOut.size();
+ TransferBlock(BB, *BBS, false);
+ if (OldOutCount != BBS->AvailableOut.size()) {
+ assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
+ Worklist.insert(succ_begin(BB), succ_end(BB));
+ }
+ }
+
+ // We now have all the information we need to decide if the use of a heap
+ // reference is legal or not, given our safepoint semantics.
+
+ bool AnyInvalidUses = false;
+
+ auto ReportInvalidUse = [&AnyInvalidUses](const Value &V,
+ const Instruction &I) {
+ errs() << "Illegal use of unrelocated value found!\n";
+ errs() << "Def: " << V << "\n";
+ errs() << "Use: " << I << "\n";
+ if (!PrintOnly)
+ abort();
+ AnyInvalidUses = true;
+ };
+
+ auto isNotExclusivelyConstantDerived = [](const Value *V) {
+ return getBaseType(V) == BaseType::NonConstant;
+ };
+
+ for (const BasicBlock &BB : F) {
+ // We destructively modify AvailableIn as we traverse the block instruction
+ // by instruction.
+ DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn;
+ for (const Instruction &I : BB) {
+ if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+ if (containsGCPtrType(PN->getType()))
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ const BasicBlock *InBB = PN->getIncomingBlock(i);
+ const Value *InValue = PN->getIncomingValue(i);
+
+ if (isNotExclusivelyConstantDerived(InValue) &&
+ !BlockMap[InBB]->AvailableOut.count(InValue))
+ ReportInvalidUse(*InValue, *PN);
+ }
+ } else if (isa<CmpInst>(I) &&
+ containsGCPtrType(I.getOperand(0)->getType())) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ enum BaseType baseTyLHS = getBaseType(LHS),
+ baseTyRHS = getBaseType(RHS);
+
+ // Returns true if LHS and RHS are unrelocated pointers and they are
+ // valid unrelocated uses.
+ auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () {
+ // A cmp instruction has valid unrelocated pointer operands only if
+ // both operands are unrelocated pointers.
+ // In the comparison between two pointers, if one is an unrelocated
+ // use, the other *should be* an unrelocated use, for this
+ // instruction to contain valid unrelocated uses. This unrelocated
+ // use can be a null constant as well, or another unrelocated
+ // pointer.
+ if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
+ return false;
+ // Constant pointers (that are not exclusively null) may have
+ // meaning in different VMs, so we cannot reorder the compare
+ // against constant pointers before the safepoint. In other words,
+ // comparison of an unrelocated use against a non-null constant
+ // maybe invalid.
+ if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
+ baseTyRHS == BaseType::NonConstant) ||
+ (baseTyLHS == BaseType::NonConstant &&
+ baseTyRHS == BaseType::ExclusivelySomeConstant))
+ return false;
+ // All other cases are valid cases enumerated below:
+ // 1. Comparison between an exlusively derived null pointer and a
+ // constant base pointer.
+ // 2. Comparison between an exlusively derived null pointer and a
+ // non-constant unrelocated base pointer.
+ // 3. Comparison between 2 unrelocated pointers.
+ return true;
+ };
+ if (!hasValidUnrelocatedUse()) {
+ // Print out all non-constant derived pointers that are unrelocated
+ // uses, which are invalid.
+ if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
+ ReportInvalidUse(*LHS, I);
+ if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
+ ReportInvalidUse(*RHS, I);
+ }
+ } else {
+ for (const Value *V : I.operands())
+ if (containsGCPtrType(V->getType()) &&
+ isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
+ ReportInvalidUse(*V, I);
+ }
+
+ bool Cleared = false;
+ TransferInstruction(I, Cleared, AvailableSet);
+ (void)Cleared;
+ }
+ }
+
+ if (PrintOnly && !AnyInvalidUses) {
+ dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
+ << "\n";
+ }
+}
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 44fe5e48c720c..20e9c2b5fff25 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -538,7 +538,7 @@ bool CompositeType::indexValid(const Value *V) const {
if (auto *STy = dyn_cast<StructType>(this)) {
// Structure indexes require (vectors of) 32-bit integer constants. In the
// vector case all of the indices must be equal.
- if (!V->getType()->getScalarType()->isIntegerTy(32))
+ if (!V->getType()->isIntOrIntVectorTy(32))
return false;
const Constant *C = dyn_cast<Constant>(V);
if (C && V->getType()->isVectorTy())
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 819f63520c744..454a56a769230 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2504,15 +2504,13 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert(SrcTy->getScalarType()->isPointerTy(),
- "PtrToInt source must be pointer", &I);
+ Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I);
if (auto *PTy = dyn_cast<PointerType>(SrcTy->getScalarType()))
Assert(!DL.isNonIntegralPointerType(PTy),
"ptrtoint not supported for non-integral pointers");
- Assert(DestTy->getScalarType()->isIntegerTy(),
- "PtrToInt result must be integral", &I);
+ Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I);
Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
&I);
@@ -2531,10 +2529,9 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert(SrcTy->getScalarType()->isIntegerTy(),
+ Assert(SrcTy->isIntOrIntVectorTy(),
"IntToPtr source must be an integral", &I);
- Assert(DestTy->getScalarType()->isPointerTy(),
- "IntToPtr result must be a pointer", &I);
+ Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I);
if (auto *PTy = dyn_cast<PointerType>(DestTy->getScalarType()))
Assert(!DL.isNonIntegralPointerType(PTy),
@@ -2952,11 +2949,10 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
Assert(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
+ Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(),
"Invalid operand types for ICmp instruction", &IC);
// Check that the predicate is valid.
- Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
- IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ Assert(IC.isIntPredicate(),
"Invalid predicate in ICmp instruction!", &IC);
visitInstruction(IC);
@@ -2972,8 +2968,7 @@ void Verifier::visitFCmpInst(FCmpInst &FC) {
Assert(Op0Ty->isFPOrFPVectorTy(),
"Invalid operand types for FCmp instruction", &FC);
// Check that the predicate is valid.
- Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
- FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ Assert(FC.isFPPredicate(),
"Invalid predicate in FCmp instruction!", &FC);
visitInstruction(FC);
@@ -3011,7 +3006,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert(GEP.getType()->getScalarType()->isPointerTy() &&
+ Assert(GEP.getType()->isPtrOrPtrVectorTy() &&
GEP.getResultElementType() == ElTy,
"GEP is not of right type for indices!", &GEP, ElTy);
@@ -3027,7 +3022,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
unsigned IndexWidth = IndexTy->getVectorNumElements();
Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
}
- Assert(IndexTy->getScalarType()->isIntegerTy(),
+ Assert(IndexTy->isIntOrIntVectorTy(),
"All GEP indices should be of integer type");
}
}
@@ -3113,7 +3108,7 @@ void Verifier::visitLoadInst(LoadInst &LI) {
ElTy, &LI);
checkAtomicMemAccessSize(ElTy, &LI);
} else {
- Assert(LI.getSynchScope() == CrossThread,
+ Assert(LI.getSyncScopeID() == SyncScope::System,
"Non-atomic load cannot have SynchronizationScope specified", &LI);
}
@@ -3142,7 +3137,7 @@ void Verifier::visitStoreInst(StoreInst &SI) {
ElTy, &SI);
checkAtomicMemAccessSize(ElTy, &SI);
} else {
- Assert(SI.getSynchScope() == CrossThread,
+ Assert(SI.getSyncScopeID() == SyncScope::System,
"Non-atomic store cannot have SynchronizationScope specified", &SI);
}
visitInstruction(SI);
@@ -4049,6 +4044,73 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"incorrect alignment of the source argument", CS);
break;
}
+ case Intrinsic::memmove_element_unordered_atomic: {
+ auto *MI = cast<ElementUnorderedAtomicMemMoveInst>(CS.getInstruction());
+
+ ConstantInt *ElementSizeCI =
+ dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes());
+ Assert(ElementSizeCI,
+ "element size of the element-wise unordered atomic memory "
+ "intrinsic must be a constant int",
+ CS);
+ const APInt &ElementSizeVal = ElementSizeCI->getValue();
+ Assert(ElementSizeVal.isPowerOf2(),
+ "element size of the element-wise atomic memory intrinsic "
+ "must be a power of 2",
+ CS);
+
+ if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) {
+ uint64_t Length = LengthCI->getZExtValue();
+ uint64_t ElementSize = MI->getElementSizeInBytes();
+ Assert((Length % ElementSize) == 0,
+ "constant length must be a multiple of the element size in the "
+ "element-wise atomic memory intrinsic",
+ CS);
+ }
+
+ auto IsValidAlignment = [&](uint64_t Alignment) {
+ return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
+ };
+ uint64_t DstAlignment = CS.getParamAlignment(0),
+ SrcAlignment = CS.getParamAlignment(1);
+ Assert(IsValidAlignment(DstAlignment),
+ "incorrect alignment of the destination argument", CS);
+ Assert(IsValidAlignment(SrcAlignment),
+ "incorrect alignment of the source argument", CS);
+ break;
+ }
+ case Intrinsic::memset_element_unordered_atomic: {
+ auto *MI = cast<ElementUnorderedAtomicMemSetInst>(CS.getInstruction());
+
+ ConstantInt *ElementSizeCI =
+ dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes());
+ Assert(ElementSizeCI,
+ "element size of the element-wise unordered atomic memory "
+ "intrinsic must be a constant int",
+ CS);
+ const APInt &ElementSizeVal = ElementSizeCI->getValue();
+ Assert(ElementSizeVal.isPowerOf2(),
+ "element size of the element-wise atomic memory intrinsic "
+ "must be a power of 2",
+ CS);
+
+ if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) {
+ uint64_t Length = LengthCI->getZExtValue();
+ uint64_t ElementSize = MI->getElementSizeInBytes();
+ Assert((Length % ElementSize) == 0,
+ "constant length must be a multiple of the element size in the "
+ "element-wise atomic memory intrinsic",
+ CS);
+ }
+
+ auto IsValidAlignment = [&](uint64_t Alignment) {
+ return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
+ };
+ uint64_t DstAlignment = CS.getParamAlignment(0);
+ Assert(IsValidAlignment(DstAlignment),
+ "incorrect alignment of the destination argument", CS);
+ break;
+ }
case Intrinsic::gcroot:
case Intrinsic::gcwrite:
case Intrinsic::gcread:
@@ -4253,7 +4315,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// relocated pointer. It can be casted to the correct type later if it's
// desired. However, they must have the same address space and 'vectorness'
GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
- Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(),
+ Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(),
"gc.relocate: relocated value must be a gc pointer", CS);
auto ResultType = CS.getType();
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 68b8c9fcb939f..19973946ac5a6 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -665,6 +665,15 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+
+ // For linker redefined symbols (via --wrap or --defsym) we want to
+ // switch the linkage to `weak` to prevent IPOs from happening.
+ // Find the summary in the module for this very GV and record the new
+ // linkage so that we can switch it when we import the GV.
+ if (Res.LinkerRedefined)
+ if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
+ GUID, BM.getModuleIdentifier()))
+ S->setLinkage(GlobalValue::WeakAnyLinkage);
}
}
}
@@ -1021,7 +1030,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
// Collect for each module the list of function it defines (GUID ->
// Summary).
- StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
+ StringMap<GVSummaryMapTy>
ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size());
ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
ModuleToDefinedGVSummaries);
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index defad1904989d..f486e525b5e76 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -1256,6 +1256,18 @@ Error IRLinker::linkModuleFlagsMetadata() {
return Error::success();
}
+/// Return InlineAsm adjusted with target-specific directives if required.
+/// For ARM and Thumb, we have to add directives to select the appropriate ISA
+/// to support mixing module-level inline assembly from ARM and Thumb modules.
+static std::string adjustInlineAsm(const std::string &InlineAsm,
+ const Triple &Triple) {
+ if (Triple.getArch() == Triple::thumb || Triple.getArch() == Triple::thumbeb)
+ return ".text\n.balign 2\n.thumb\n" + InlineAsm;
+ if (Triple.getArch() == Triple::arm || Triple.getArch() == Triple::armeb)
+ return ".text\n.balign 4\n.arm\n" + InlineAsm;
+ return InlineAsm;
+}
+
Error IRLinker::run() {
// Ensure metadata materialized before value mapping.
if (SrcM->getMaterializer())
@@ -1293,11 +1305,13 @@ Error IRLinker::run() {
// Append the module inline asm string.
if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
+ std::string SrcModuleInlineAsm = adjustInlineAsm(SrcM->getModuleInlineAsm(),
+ SrcTriple);
if (DstM.getModuleInlineAsm().empty())
- DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm());
+ DstM.setModuleInlineAsm(SrcModuleInlineAsm);
else
DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" +
- SrcM->getModuleInlineAsm());
+ SrcModuleInlineAsm);
}
// Loop over all of the linked values to compute type mappings.
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 30f357826805a..c8dd630119439 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -26,6 +27,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
@@ -204,8 +206,7 @@ public:
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, bool &IsPCRel,
- uint64_t &FixedValue) override;
+ MCValue Target, uint64_t &FixedValue) override;
// Map from a signature symbol to the group section index
using RevGroupMapTy = DenseMap<const MCSymbol *, unsigned>;
@@ -626,7 +627,10 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- bool &IsPCRel, uint64_t &FixedValue) {
+ uint64_t &FixedValue) {
+ MCAsmBackend &Backend = Asm.getBackend();
+ bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsPCRel;
const MCSectionELF &FixupSection = cast<MCSectionELF>(*Fragment->getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 0318d916aa49f..eaf6f19326eb4 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -653,16 +653,14 @@ MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F,
// Evaluate the fixup.
MCValue Target;
uint64_t FixedValue;
- bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
- MCFixupKindInfo::FKF_IsPCRel;
- if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ bool IsResolved = evaluateFixup(Layout, Fixup, &F, Target, FixedValue);
+ if (!IsResolved) {
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
// fixup value if need be.
- getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel,
- FixedValue);
+ getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
}
- return std::make_tuple(Target, FixedValue, IsPCRel);
+ return std::make_tuple(Target, FixedValue, IsResolved);
}
void MCAssembler::layout(MCAsmLayout &Layout) {
@@ -738,12 +736,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
uint64_t FixedValue;
- bool IsPCRel;
+ bool IsResolved;
MCValue Target;
- std::tie(Target, FixedValue, IsPCRel) =
+ std::tie(Target, FixedValue, IsResolved) =
handleFixup(Layout, Frag, Fixup);
getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue,
- IsPCRel);
+ IsResolved);
}
}
}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index c4e7cdbe095e7..62bf0a58fdfa9 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -449,7 +449,7 @@ void MachObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- bool &IsPCRel, uint64_t &FixedValue) {
+ uint64_t &FixedValue) {
TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
Target, FixedValue);
}
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 82352cb50c70c..0d31f65c49d9f 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -36,8 +36,7 @@
using namespace llvm;
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "reloc-info"
+#define DEBUG_TYPE "mc"
namespace {
@@ -153,7 +152,7 @@ struct WasmRelocationEntry {
}
void print(raw_ostream &Out) const {
- Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend
+ Out << "Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend
<< ", Type=" << Type << ", FixupSection=" << FixupSection;
}
@@ -199,6 +198,7 @@ class WasmObjectWriter : public MCObjectWriter {
DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
FunctionTypeIndices;
+ SmallVector<WasmFunctionType, 4> FunctionTypes;
// TargetObjectWriter wrappers.
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
@@ -224,6 +224,7 @@ private:
SymbolIndices.clear();
IndirectSymbolIndices.clear();
FunctionTypeIndices.clear();
+ FunctionTypes.clear();
MCObjectWriter::reset();
}
@@ -231,8 +232,7 @@ private:
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, bool &IsPCRel,
- uint64_t &FixedValue) override;
+ MCValue Target, uint64_t &FixedValue) override;
void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
@@ -276,6 +276,8 @@ private:
void writeRelocations(ArrayRef<WasmRelocationEntry> Relocations,
uint64_t HeaderSize);
uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
+ uint32_t getFunctionType(const MCSymbolWasm& Symbol);
+ uint32_t registerFunctionType(const MCSymbolWasm& Symbol);
};
} // end anonymous namespace
@@ -350,7 +352,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- bool &IsPCRel, uint64_t &FixedValue) {
+ uint64_t &FixedValue) {
+ MCAsmBackend &Backend = Asm.getBackend();
+ bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsPCRel;
const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
@@ -401,15 +406,11 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
const MCSymbolRefExpr *RefA = Target.getSymA();
const auto *SymA = RefA ? cast<MCSymbolWasm>(&RefA->getSymbol()) : nullptr;
- bool ViaWeakRef = false;
if (SymA && SymA->isVariable()) {
const MCExpr *Expr = SymA->getVariableValue();
- if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) {
- if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) {
- SymA = cast<MCSymbolWasm>(&Inner->getSymbol());
- ViaWeakRef = true;
- }
- }
+ const auto *Inner = cast<MCSymbolRefExpr>(Expr);
+ if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
+ llvm_unreachable("weakref used in reloc not yet implemented");
}
// Put any constant offset in an addend. Offsets can be negative, and
@@ -417,12 +418,8 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
// be negative and don't wrap.
FixedValue = 0;
- if (SymA) {
- if (ViaWeakRef)
- llvm_unreachable("weakref used in reloc not yet implemented");
- else
- SymA->setUsedInReloc();
- }
+ if (SymA)
+ SymA->setUsedInReloc();
assert(!IsPCRel);
assert(SymA);
@@ -493,7 +490,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue(
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
if (!IndirectSymbolIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found table index space:" +
+ report_fatal_error("symbol not found table index space: " +
RelEntry.Symbol->getName());
return IndirectSymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
@@ -502,12 +499,12 @@ uint32_t WasmObjectWriter::getRelocationIndexValue(
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
if (!SymbolIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found function/global index space:" +
+ report_fatal_error("symbol not found function/global index space: " +
RelEntry.Symbol->getName());
return SymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
if (!TypeIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found in type index space:" +
+ report_fatal_error("symbol not found in type index space: " +
RelEntry.Symbol->getName());
return TypeIndices[RelEntry.Symbol];
default:
@@ -913,6 +910,38 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
endSection(Section);
}
+uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) {
+ assert(Symbol.isFunction());
+ assert(TypeIndices.count(&Symbol));
+ return TypeIndices[&Symbol];
+}
+
+uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) {
+ assert(Symbol.isFunction());
+
+ WasmFunctionType F;
+ if (Symbol.isVariable()) {
+ const MCExpr *Expr = Symbol.getVariableValue();
+ auto *Inner = cast<MCSymbolRefExpr>(Expr);
+ const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol());
+ F.Returns = ResolvedSym->getReturns();
+ F.Params = ResolvedSym->getParams();
+ } else {
+ F.Returns = Symbol.getReturns();
+ F.Params = Symbol.getParams();
+ }
+
+ auto Pair =
+ FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+ if (Pair.second)
+ FunctionTypes.push_back(F);
+ TypeIndices[&Symbol] = Pair.first->second;
+
+ DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n");
+ DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n");
+ return Pair.first->second;
+}
+
void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
@@ -920,7 +949,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32;
// Collect information from the available symbols.
- SmallVector<WasmFunctionType, 4> FunctionTypes;
SmallVector<WasmFunction, 4> Functions;
SmallVector<uint32_t, 4> TableElems;
SmallVector<WasmGlobal, 4> Globals;
@@ -960,37 +988,27 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
// Populate the Imports set.
for (const MCSymbol &S : Asm.symbols()) {
const auto &WS = static_cast<const MCSymbolWasm &>(S);
- int32_t Type;
- if (WS.isFunction()) {
- // Prepare the function's type, if we haven't seen it yet.
- WasmFunctionType F;
- F.Returns = WS.getReturns();
- F.Params = WS.getParams();
- auto Pair =
- FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
- if (Pair.second)
- FunctionTypes.push_back(F);
-
- Type = Pair.first->second;
- } else {
- Type = int32_t(PtrType);
- }
+ if (WS.isTemporary())
+ continue;
+
+ if (WS.isFunction())
+ registerFunctionType(WS);
// If the symbol is not defined in this translation unit, import it.
- if (!WS.isTemporary() && !WS.isDefined(/*SetUsed=*/false)) {
+ if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) {
WasmImport Import;
Import.ModuleName = WS.getModuleName();
Import.FieldName = WS.getName();
if (WS.isFunction()) {
Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
- Import.Type = Type;
+ Import.Type = getFunctionType(WS);
SymbolIndices[&WS] = NumFuncImports;
++NumFuncImports;
} else {
Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
- Import.Type = Type;
+ Import.Type = int32_t(PtrType);
SymbolIndices[&WS] = NumGlobalImports;
++NumGlobalImports;
}
@@ -1082,10 +1100,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (S.isTemporary() && S.getName().empty())
continue;
- // Variable references (weak references) are handled in a second pass
- if (S.isVariable())
- continue;
-
const auto &WS = static_cast<const MCSymbolWasm &>(S);
DEBUG(dbgs() << "MCSymbol: '" << S << "'"
<< " isDefined=" << S.isDefined() << " isExternal="
@@ -1097,20 +1111,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (WS.isWeak())
WeakSymbols.push_back(WS.getName());
+ if (WS.isVariable())
+ continue;
+
unsigned Index;
if (WS.isFunction()) {
- // Prepare the function's type, if we haven't seen it yet.
- WasmFunctionType F;
- F.Returns = WS.getReturns();
- F.Params = WS.getParams();
- auto Pair =
- FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
- if (Pair.second)
- FunctionTypes.push_back(F);
-
- int32_t Type = Pair.first->second;
-
if (WS.isDefined(/*SetUsed=*/false)) {
if (WS.getOffset() != 0)
report_fatal_error(
@@ -1125,21 +1131,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
// Prepare the function.
WasmFunction Func;
- Func.Type = Type;
+ Func.Type = getFunctionType(WS);
Func.Sym = &WS;
SymbolIndices[&WS] = Index;
Functions.push_back(Func);
} else {
- // Should be no such thing as weak undefined symbol
- assert(!WS.isVariable());
-
// An import; the index was assigned above.
Index = SymbolIndices.find(&WS)->second;
}
+ DEBUG(dbgs() << " -> function index: " << Index << "\n");
+
// If needed, prepare the function to be called indirectly.
- if (IsAddressTaken.count(&WS)) {
+ if (IsAddressTaken.count(&WS) != 0) {
IndirectSymbolIndices[&WS] = TableElems.size();
+ DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n");
TableElems.push_back(Index);
}
} else {
@@ -1185,7 +1191,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
Align->getMaxBytesToEmit());
DataBytes.resize(Size, Value);
} else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
- DataBytes.insert(DataBytes.end(), Size, Fill->getValue());
+ DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue());
} else {
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
@@ -1205,11 +1211,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
Global.InitialValue = DataSection.getSectionOffset();
Global.ImportIndex = 0;
SymbolIndices[&WS] = Index;
+ DEBUG(dbgs() << " -> global index: " << Index << "\n");
Globals.push_back(Global);
}
// If the symbol is visible outside this translation unit, export it.
- if (WS.isExternal() && WS.isDefined(/*SetUsed=*/false)) {
+ if ((WS.isExternal() && WS.isDefined(/*SetUsed=*/false))) {
WasmExport Export;
Export.FieldName = WS.getName();
Export.Index = Index;
@@ -1217,26 +1224,28 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
else
Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ DEBUG(dbgs() << " -> export " << Exports.size() << "\n");
Exports.push_back(Export);
}
}
- // Handle weak aliases
+ // Handle weak aliases. We need to process these in a separate pass because
+ // we need to have processed the target of the alias before the alias itself
+ // and the symbols are not necessarily ordered in this way.
for (const MCSymbol &S : Asm.symbols()) {
if (!S.isVariable())
continue;
- assert(S.isExternal());
assert(S.isDefined(/*SetUsed=*/false));
const auto &WS = static_cast<const MCSymbolWasm &>(S);
-
- // Find the target symbol of this weak alias
+ // Find the target symbol of this weak alias and export that index
const MCExpr *Expr = WS.getVariableValue();
- auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
+ auto *Inner = cast<MCSymbolRefExpr>(Expr);
const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol());
+ DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n");
+ assert(SymbolIndices.count(ResolvedSym) > 0);
uint32_t Index = SymbolIndices.find(ResolvedSym)->second;
- DEBUG(dbgs() << "Weak alias: '" << WS << "' -> '" << ResolvedSym << "' = " << Index << "\n");
- SymbolIndices[&WS] = Index;
+ DEBUG(dbgs() << " -> index:" << Index << "\n");
WasmExport Export;
Export.FieldName = WS.getName();
@@ -1245,7 +1254,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
else
Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
- WeakSymbols.push_back(Export.FieldName);
+ DEBUG(dbgs() << " -> export " << Exports.size() << "\n");
Exports.push_back(Export);
}
@@ -1254,15 +1263,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
continue;
- WasmFunctionType F;
- F.Returns = Fixup.Symbol->getReturns();
- F.Params = Fixup.Symbol->getParams();
- auto Pair =
- FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
- if (Pair.second)
- FunctionTypes.push_back(F);
-
- TypeIndices[Fixup.Symbol] = Pair.first->second;
+ registerFunctionType(*Fixup.Symbol);
}
// Write out the Wasm header.
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index fc5234950391b..956ae70b38d19 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -197,8 +197,7 @@ public:
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, bool &IsPCRel,
- uint64_t &FixedValue) override;
+ MCValue Target, uint64_t &FixedValue) override;
void createFileSymbols(MCAssembler &Asm);
void assignSectionNumbers();
@@ -708,9 +707,11 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
InSet, IsPCRel);
}
-void WinCOFFObjectWriter::recordRelocation(
- MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
+void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
assert(Target.getSymA() && "Relocation must reference a symbol!");
const MCSymbol &A = Target.getSymA()->getSymbol();
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index fff497ba55647..7f80bf0b83a0a 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -567,20 +567,16 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End)
Ex.Name = readString(Ptr);
Ex.Kind = readUint8(Ptr);
Ex.Index = readVaruint32(Ptr);
+ WasmSymbol::SymbolType ExportType;
+ bool MakeSymbol = false;
switch (Ex.Kind) {
case wasm::WASM_EXTERNAL_FUNCTION:
- SymbolMap.try_emplace(Ex.Name, Symbols.size());
- Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT,
- Sections.size(), i);
- DEBUG(dbgs() << "Adding export: " << Symbols.back()
- << " sym index:" << Symbols.size() << "\n");
+ ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT;
+ MakeSymbol = true;
break;
case wasm::WASM_EXTERNAL_GLOBAL:
- SymbolMap.try_emplace(Ex.Name, Symbols.size());
- Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT,
- Sections.size(), i);
- DEBUG(dbgs() << "Adding export: " << Symbols.back()
- << " sym index:" << Symbols.size() << "\n");
+ ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT;
+ MakeSymbol = true;
break;
case wasm::WASM_EXTERNAL_MEMORY:
case wasm::WASM_EXTERNAL_TABLE:
@@ -589,6 +585,20 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End)
return make_error<GenericBinaryError>(
"Unexpected export kind", object_error::parse_failed);
}
+ if (MakeSymbol) {
+ auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size());
+ if (Pair.second) {
+ Symbols.emplace_back(Ex.Name, ExportType,
+ Sections.size(), i);
+ DEBUG(dbgs() << "Adding export: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
+ } else {
+ uint32_t SymIndex = Pair.first->second;
+ Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i);
+ DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex]
+ << " sym index:" << SymIndex << "\n");
+ }
+ }
Exports.push_back(Ex);
}
if (Ptr != End)
@@ -665,15 +675,17 @@ Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) {
}
Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) {
+ const uint8_t *Start = Ptr;
uint32_t Count = readVaruint32(Ptr);
DataSegments.reserve(Count);
while (Count--) {
- wasm::WasmDataSegment Segment;
- Segment.Index = readVaruint32(Ptr);
- if (Error Err = readInitExpr(Segment.Offset, Ptr))
+ WasmSegment Segment;
+ Segment.Data.MemoryIndex = readVaruint32(Ptr);
+ if (Error Err = readInitExpr(Segment.Data.Offset, Ptr))
return Err;
uint32_t Size = readVaruint32(Ptr);
- Segment.Content = ArrayRef<uint8_t>(Ptr, Size);
+ Segment.Data.Content = ArrayRef<uint8_t>(Ptr, Size);
+ Segment.SectionOffset = Ptr - Start;
Ptr += Size;
DataSegments.push_back(Segment);
}
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 1371eacdf8f2d..246eee5ddb311 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -609,8 +609,8 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
for (auto const &Child : StringChildren) {
auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart +
CurrentOffset);
- Entry->Identifier.NameOffset =
- StringTableOffsets[Child.second->getStringIndex()];
+ Entry->Identifier.setNameOffset(
+ StringTableOffsets[Child.second->getStringIndex()]);
if (Child.second->checkIsDataNode()) {
Entry->Offset.DataEntryOffset = NextLevelOffset;
NextLevelOffset += sizeof(coff_resource_data_entry);
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 2040efdc9d117..6a68cd265ad84 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -345,7 +345,8 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
void MappingTraits<WasmYAML::DataSegment>::mapping(
IO &IO, WasmYAML::DataSegment &Segment) {
- IO.mapRequired("Index", Segment.Index);
+ IO.mapOptional("SectionOffset", Segment.SectionOffset);
+ IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
IO.mapRequired("Offset", Segment.Offset);
IO.mapRequired("Content", Segment.Content);
}
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index acb9e8d015bce..bcd365236e46e 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -225,11 +225,15 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
return {};
}
-std::vector<std::string> OptTable::findByPrefix(StringRef Cur) const {
+std::vector<std::string>
+OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const {
std::vector<std::string> Ret;
for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) {
- if (!In.Prefixes)
+ if (!In.Prefixes || (!In.HelpText && !In.GroupID))
+ continue;
+ if (In.Flags & DisableFlags)
continue;
+
for (int I = 0; In.Prefixes[I]; I++) {
std::string S = std::string(In.Prefixes[I]) + std::string(In.Name);
if (StringRef(S).startswith(Cur))
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 0380bd991d717..9e0cf27aa17b5 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -281,33 +281,52 @@ AnalysisKey NoOpLoopAnalysis::Key;
} // End anonymous namespace.
+void PassBuilder::invokePeepholeEPCallbacks(
+ FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+ for (auto &C : PeepholeEPCallbacks)
+ C(FPM, Level);
+}
+
void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
MAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
+
+ for (auto &C : ModuleAnalysisRegistrationCallbacks)
+ C(MAM);
}
void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
CGAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
+
+ for (auto &C : CGSCCAnalysisRegistrationCallbacks)
+ C(CGAM);
}
void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
FAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
+
+ for (auto &C : FunctionAnalysisRegistrationCallbacks)
+ C(FAM);
}
void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
LAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
+
+ for (auto &C : LoopAnalysisRegistrationCallbacks)
+ C(LAM);
}
FunctionPassManager
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ bool DebugLogging,
+ bool PrepareForThinLTO) {
assert(Level != O0 && "Must request optimizations!");
FunctionPassManager FPM(DebugLogging);
@@ -340,6 +359,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (!isOptimizingForSize(Level))
FPM.addPass(LibCallsShrinkWrapPass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
@@ -363,11 +384,19 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
LPM2.addPass(LoopIdiomRecognizePass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
LPM2.addPass(LoopDeletionPass());
- // FIXME: The old pass manager has a hack to disable loop unrolling during
- // ThinLTO when using sample PGO. Need to either fix it or port some
- // workaround.
- LPM2.addPass(LoopUnrollPass::createFull(Level));
+ // Do not enable unrolling in PrepareForThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate.
+ if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty())
+ LPM2.addPass(LoopUnrollPass::createFull(Level));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
@@ -403,6 +432,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Run instcombine after redundancy and dead bit elimination to exploit
// opportunities opened up by them.
FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
@@ -411,19 +441,24 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
return FPM;
}
-static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
- PassBuilder::OptimizationLevel Level,
- bool RunProfileGen, std::string ProfileGenFile,
- std::string ProfileUseFile) {
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
+ PassBuilder::OptimizationLevel Level,
+ bool RunProfileGen,
+ std::string ProfileGenFile,
+ std::string ProfileUseFile) {
// Generally running simplification passes and the inliner with an high
// threshold results in smaller executables, but there may be cases where
// the size grows, so let's be conservative here and skip this simplification
@@ -448,9 +483,8 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
FPM.addPass(InstCombinePass()); // Combine silly sequences.
+ invokePeepholeEPCallbacks(FPM, Level);
- // FIXME: Here the old pass manager inserts peephole extensions.
- // Add them when they're supported.
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
@@ -490,7 +524,8 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ bool DebugLogging,
+ bool PrepareForThinLTO) {
ModulePassManager MPM(DebugLogging);
// Do basic inference of function attributes from known properties of system
@@ -530,6 +565,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// optimizations.
FunctionPassManager GlobalCleanupPM(DebugLogging);
GlobalCleanupPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
+
GlobalCleanupPM.addPass(SimplifyCFGPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
@@ -544,8 +581,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// Indirect call promotion that promotes intra-module targes only.
- MPM.addPass(PGOIndirectCallPromotion(
- false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
+ // Do not enable it in PrepareForThinLTO phase during sample PGO because
+ // it changes IR to makes profile annotation in back compile inaccurate.
+ if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty())
+ MPM.addPass(PGOIndirectCallPromotion(
+ false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
}
// Require the GlobalsAA analysis for the module so we can query it within
@@ -570,7 +610,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
- MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level)));
+ // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO
+ // because it makes profile annotation in the backend inaccurate.
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty())
+ IP.HotCallSiteThreshold = 0;
+ MainCGPipeline.addPass(InlinerPass(IP));
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
@@ -583,7 +628,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, DebugLogging)));
+ buildFunctionSimplificationPipeline(Level, DebugLogging,
+ PrepareForThinLTO)));
+
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
// We wrap the CGSCC pipeline in a devirtualization repeater. This will try
// to detect when we devirtualize indirect calls and iterate the SCC passes
@@ -643,6 +692,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// rather than on each loop in an inside-out manner, and so they are actually
// function passes.
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(OptimizePM, Level);
+
// First rotate loops that may have been un-rotated by prior passes.
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
@@ -726,7 +778,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
MPM.addPass(ForceFunctionAttrsPass());
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/false));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
@@ -747,7 +800,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
// can.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/true));
// Run partial inlining pass to partially inline functions that have
// large bodies.
@@ -785,7 +839,8 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
!PGOOpt->ProfileUseFile.empty()));
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/false));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
@@ -868,8 +923,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// simplification opportunities, and both can propagate functions through
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
- // FIXME: add peephole extensions here as the legacy PM does.
- MPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass()));
+ FunctionPassManager PeepholeFPM(DebugLogging);
+ PeepholeFPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(PeepholeFPM, Level);
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
// Note: historically, the PruneEH pass was run first to deduce nounwind and
// generally clean up exception handling overhead. It isn't clear this is
@@ -887,10 +945,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(GlobalDCEPass());
FunctionPassManager FPM(DebugLogging);
-
// The IPO Passes may leave cruft around. Clean up after them.
- // FIXME: add peephole extensions here as the legacy PM does.
FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
FPM.addPass(JumpThreadingPass());
// Break up allocas
@@ -937,8 +995,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MainFPM.add(AlignmentFromAssumptionsPass());
#endif
- // FIXME: add peephole extensions to the PM here.
+ // FIXME: Conditionally run LoadCombine here, after it's ported
+ // (in case we still have this pass, given its questionable usefulness).
+
MainFPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(MainFPM, Level);
MainFPM.addPass(JumpThreadingPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
@@ -1021,7 +1082,27 @@ static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
Name.startswith("lto");
}
-static bool isModulePassName(StringRef Name) {
+/// Tests whether registered callbacks will accept a given pass name.
+///
+/// When parsing a pipeline text, the type of the outermost pipeline may be
+/// omitted, in which case the type is automatically determined from the first
+/// pass name in the text. This may be a name that is handled through one of the
+/// callbacks. We check this through the oridinary parsing callbacks by setting
+/// up a dummy PassManager in order to not force the client to also handle this
+/// type of query.
+template <typename PassManagerT, typename CallbacksT>
+static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) {
+ if (!Callbacks.empty()) {
+ PassManagerT DummyPM;
+ for (auto &CB : Callbacks)
+ if (CB(Name, DummyPM, {}))
+ return true;
+ }
+ return false;
+}
+
+template <typename CallbacksT>
+static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
// Manually handle aliases for pre-configured pipeline fragments.
if (startsWithDefaultPipelineAliasPrefix(Name))
return DefaultAliasRegex.match(Name);
@@ -1046,10 +1127,11 @@ static bool isModulePassName(StringRef Name) {
return true;
#include "PassRegistry.def"
- return false;
+ return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks);
}
-static bool isCGSCCPassName(StringRef Name) {
+template <typename CallbacksT>
+static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "cgscc")
return true;
@@ -1070,10 +1152,11 @@ static bool isCGSCCPassName(StringRef Name) {
return true;
#include "PassRegistry.def"
- return false;
+ return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks);
}
-static bool isFunctionPassName(StringRef Name) {
+template <typename CallbacksT>
+static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "function")
return true;
@@ -1092,10 +1175,11 @@ static bool isFunctionPassName(StringRef Name) {
return true;
#include "PassRegistry.def"
- return false;
+ return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks);
}
-static bool isLoopPassName(StringRef Name) {
+template <typename CallbacksT>
+static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "loop")
return true;
@@ -1112,7 +1196,7 @@ static bool isLoopPassName(StringRef Name) {
return true;
#include "PassRegistry.def"
- return false;
+ return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
}
Optional<std::vector<PassBuilder::PipelineElement>>
@@ -1213,6 +1297,11 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
return true;
}
+
+ for (auto &C : ModulePipelineParsingCallbacks)
+ if (C(Name, MPM, InnerPipeline))
+ return true;
+
// Normal passes can't have pipelines.
return false;
}
@@ -1225,12 +1314,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
assert(Matches.size() == 3 && "Must capture two matched strings!");
OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
- .Case("O0", O0)
- .Case("O1", O1)
- .Case("O2", O2)
- .Case("O3", O3)
- .Case("Os", Os)
- .Case("Oz", Oz);
+ .Case("O0", O0)
+ .Case("O1", O1)
+ .Case("O2", O2)
+ .Case("O3", O3)
+ .Case("Os", Os)
+ .Case("Oz", Oz);
if (L == O0)
// At O0 we do nothing at all!
return true;
@@ -1270,6 +1359,9 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
}
#include "PassRegistry.def"
+ for (auto &C : ModulePipelineParsingCallbacks)
+ if (C(Name, MPM, InnerPipeline))
+ return true;
return false;
}
@@ -1317,11 +1409,16 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
*MaxRepetitions, DebugLogging));
return true;
}
+
+ for (auto &C : CGSCCPipelineParsingCallbacks)
+ if (C(Name, CGPM, InnerPipeline))
+ return true;
+
// Normal passes can't have pipelines.
return false;
}
- // Now expand the basic registered passes from the .inc file.
+// Now expand the basic registered passes from the .inc file.
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(CREATE_PASS); \
@@ -1342,6 +1439,9 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
}
#include "PassRegistry.def"
+ for (auto &C : CGSCCPipelineParsingCallbacks)
+ if (C(Name, CGPM, InnerPipeline))
+ return true;
return false;
}
@@ -1379,11 +1479,16 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
return true;
}
+
+ for (auto &C : FunctionPipelineParsingCallbacks)
+ if (C(Name, FPM, InnerPipeline))
+ return true;
+
// Normal passes can't have pipelines.
return false;
}
- // Now expand the basic registered passes from the .inc file.
+// Now expand the basic registered passes from the .inc file.
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(CREATE_PASS); \
@@ -1403,6 +1508,9 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
}
#include "PassRegistry.def"
+ for (auto &C : FunctionPipelineParsingCallbacks)
+ if (C(Name, FPM, InnerPipeline))
+ return true;
return false;
}
@@ -1430,11 +1538,16 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
return true;
}
+
+ for (auto &C : LoopPipelineParsingCallbacks)
+ if (C(Name, LPM, InnerPipeline))
+ return true;
+
// Normal passes can't have pipelines.
return false;
}
- // Now expand the basic registered passes from the .inc file.
+// Now expand the basic registered passes from the .inc file.
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
LPM.addPass(CREATE_PASS); \
@@ -1455,6 +1568,9 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
}
#include "PassRegistry.def"
+ for (auto &C : LoopPipelineParsingCallbacks)
+ if (C(Name, LPM, InnerPipeline))
+ return true;
return false;
}
@@ -1473,6 +1589,9 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
}
#include "PassRegistry.def"
+ for (auto &C : AAParsingCallbacks)
+ if (C(Name, AA))
+ return true;
return false;
}
@@ -1539,7 +1658,7 @@ bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
return true;
}
-// Primary pass pipeline description parsing routine.
+// Primary pass pipeline description parsing routine for a \c ModulePassManager
// FIXME: Should this routine accept a TargetMachine or require the caller to
// pre-populate the analysis managers with target-specific stuff?
bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
@@ -1553,21 +1672,70 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
// automatically.
StringRef FirstName = Pipeline->front().Name;
- if (!isModulePassName(FirstName)) {
- if (isCGSCCPassName(FirstName))
+ if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
+ if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
Pipeline = {{"cgscc", std::move(*Pipeline)}};
- else if (isFunctionPassName(FirstName))
+ } else if (isFunctionPassName(FirstName,
+ FunctionPipelineParsingCallbacks)) {
Pipeline = {{"function", std::move(*Pipeline)}};
- else if (isLoopPassName(FirstName))
+ } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
- else
+ } else {
+ for (auto &C : TopLevelPipelineParsingCallbacks)
+ if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+ return true;
+
// Unknown pass name!
return false;
+ }
}
return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging);
}
+// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
+bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
+ StringRef PipelineText, bool VerifyEachPass,
+ bool DebugLogging) {
+ auto Pipeline = parsePipelineText(PipelineText);
+ if (!Pipeline || Pipeline->empty())
+ return false;
+
+ StringRef FirstName = Pipeline->front().Name;
+ if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
+ return false;
+
+ return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+}
+
+// Primary pass pipeline description parsing routine for a \c
+// FunctionPassManager
+bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
+ StringRef PipelineText, bool VerifyEachPass,
+ bool DebugLogging) {
+ auto Pipeline = parsePipelineText(PipelineText);
+ if (!Pipeline || Pipeline->empty())
+ return false;
+
+ StringRef FirstName = Pipeline->front().Name;
+ if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
+ return false;
+
+ return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
+ DebugLogging);
+}
+
+// Primary pass pipeline description parsing routine for a \c LoopPassManager
+bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
+ StringRef PipelineText, bool VerifyEachPass,
+ bool DebugLogging) {
+ auto Pipeline = parsePipelineText(PipelineText);
+ if (!Pipeline || Pipeline->empty())
+ return false;
+
+ return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+}
+
bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
// If the pipeline just consists of the word 'default' just replace the AA
// manager with our default one.
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index a1d18724fcd57..48c1643cb13c8 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -460,9 +460,9 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
return Error::success();
}
-void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE,
- InstrProfValueSiteRecord &Input,
- uint64_t Weight) {
+void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
+ uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
this->sortByTargetValues();
Input.sortByTargetValues();
auto I = ValueData.begin();
@@ -475,7 +475,7 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE,
bool Overflowed;
I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
if (Overflowed)
- SIPE.addError(instrprof_error::counter_overflow);
+ Warn(instrprof_error::counter_overflow);
++I;
continue;
}
@@ -483,25 +483,25 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE,
}
}
-void InstrProfValueSiteRecord::scale(SoftInstrProfErrors &SIPE,
- uint64_t Weight) {
+void InstrProfValueSiteRecord::scale(uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
bool Overflowed;
I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
if (Overflowed)
- SIPE.addError(instrprof_error::counter_overflow);
+ Warn(instrprof_error::counter_overflow);
}
}
// Merge Value Profile data from Src record to this record for ValueKind.
// Scale merged value counts by \p Weight.
-void InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
- InstrProfRecord &Src,
- uint64_t Weight) {
+void InstrProfRecord::mergeValueProfData(
+ uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind);
if (ThisNumValueSites != OtherNumValueSites) {
- SIPE.addError(instrprof_error::value_site_count_mismatch);
+ Warn(instrprof_error::value_site_count_mismatch);
return;
}
if (!ThisNumValueSites)
@@ -511,14 +511,15 @@ void InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
Src.getValueSitesForKind(ValueKind);
for (uint32_t I = 0; I < ThisNumValueSites; I++)
- ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight);
+ ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn);
}
-void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) {
+void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
// If the number of counters doesn't match we either have bad data
// or a hash collision.
if (Counts.size() != Other.Counts.size()) {
- SIPE.addError(instrprof_error::count_mismatch);
+ Warn(instrprof_error::count_mismatch);
return;
}
@@ -527,27 +528,30 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) {
Counts[I] =
SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
if (Overflowed)
- SIPE.addError(instrprof_error::counter_overflow);
+ Warn(instrprof_error::counter_overflow);
}
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
- mergeValueProfData(Kind, Other, Weight);
+ mergeValueProfData(Kind, Other, Weight, Warn);
}
-void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) {
+void InstrProfRecord::scaleValueProfData(
+ uint32_t ValueKind, uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
for (auto &R : getValueSitesForKind(ValueKind))
- R.scale(SIPE, Weight);
+ R.scale(Weight, Warn);
}
-void InstrProfRecord::scale(uint64_t Weight) {
+void InstrProfRecord::scale(uint64_t Weight,
+ function_ref<void(instrprof_error)> Warn) {
for (auto &Count : this->Counts) {
bool Overflowed;
Count = SaturatingMultiply(Count, Weight, &Overflowed);
if (Overflowed)
- SIPE.addError(instrprof_error::counter_overflow);
+ Warn(instrprof_error::counter_overflow);
}
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
- scaleValueProfData(Kind, Weight);
+ scaleValueProfData(Kind, Weight, Warn);
}
// Map indirect call target name hash to name string.
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 1ed1fb8b6f0b5..1b39a0695aac6 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -221,7 +221,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
#undef VP_READ_ADVANCE
}
-Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
// Skip empty lines and comments.
while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
@@ -377,13 +377,13 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
}
template <class IntPtrT>
-Error RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) {
+Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
Record.Name = getName(Data->NameRef);
return success();
}
template <class IntPtrT>
-Error RawInstrProfReader<IntPtrT>::readFuncHash(InstrProfRecord &Record) {
+Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
Record.Hash = swap(Data->FuncHash);
return success();
}
@@ -445,7 +445,7 @@ Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
}
template <class IntPtrT>
-Error RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
+Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
if (atEnd())
// At this point, ValueDataStart field points to the next header.
if (Error E = readNextHeader(getNextHeaderPos()))
@@ -550,7 +550,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
template <typename HashTableImpl>
Error InstrProfReaderIndex<HashTableImpl>::getRecords(
- StringRef FuncName, ArrayRef<InstrProfRecord> &Data) {
+ StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
auto Iter = HashTable->find(FuncName);
if (Iter == HashTable->end())
return make_error<InstrProfError>(instrprof_error::unknown_function);
@@ -564,7 +564,7 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
template <typename HashTableImpl>
Error InstrProfReaderIndex<HashTableImpl>::getRecords(
- ArrayRef<InstrProfRecord> &Data) {
+ ArrayRef<NamedInstrProfRecord> &Data) {
if (atEnd())
return make_error<InstrProfError>(instrprof_error::eof);
@@ -644,7 +644,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
// FIXME: This only computes an empty summary. Need to call addRecord for
- // all InstrProfRecords to get the correct summary.
+ // all NamedInstrProfRecords to get the correct summary.
this->Summary = Builder.getSummary();
return Cur;
}
@@ -707,7 +707,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
Expected<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
- ArrayRef<InstrProfRecord> Data;
+ ArrayRef<NamedInstrProfRecord> Data;
Error Err = Index->getRecords(FuncName, Data);
if (Err)
return std::move(Err);
@@ -732,10 +732,10 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
return success();
}
-Error IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
static unsigned RecordIndex = 0;
- ArrayRef<InstrProfRecord> Data;
+ ArrayRef<NamedInstrProfRecord> Data;
Error E = Index->getRecords(Data);
if (E)
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 9efea78ed2a89..ce3f8806e12e7 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -176,38 +176,46 @@ void InstrProfWriter::setOutputSparse(bool Sparse) {
this->Sparse = Sparse;
}
-Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) {
- auto &ProfileDataMap = FunctionData[I.Name];
+void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
+ function_ref<void(Error)> Warn) {
+ auto Name = I.Name;
+ auto Hash = I.Hash;
+ addRecord(Name, Hash, std::move(I), Weight, Warn);
+}
+
+void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
+ InstrProfRecord &&I, uint64_t Weight,
+ function_ref<void(Error)> Warn) {
+ auto &ProfileDataMap = FunctionData[Name];
bool NewFunc;
ProfilingData::iterator Where;
std::tie(Where, NewFunc) =
- ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
+ ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
InstrProfRecord &Dest = Where->second;
+ auto MapWarn = [&](instrprof_error E) {
+ Warn(make_error<InstrProfError>(E));
+ };
+
if (NewFunc) {
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
- // Fix up the name to avoid dangling reference.
- Dest.Name = FunctionData.find(Dest.Name)->getKey();
if (Weight > 1)
- Dest.scale(Weight);
+ Dest.scale(Weight, MapWarn);
} else {
// We're updating a function we've seen before.
- Dest.merge(I, Weight);
+ Dest.merge(I, Weight, MapWarn);
}
Dest.sortValueData();
-
- return Dest.takeError();
}
-Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) {
+void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
+ function_ref<void(Error)> Warn) {
for (auto &I : IPW.FunctionData)
for (auto &Func : I.getValue())
- if (Error E = addRecord(std::move(Func.second), 1))
- return E;
- return Error::success();
+ addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
}
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@@ -323,11 +331,12 @@ static const char *ValueProfKindStr[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
-void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
+void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
+ const InstrProfRecord &Func,
InstrProfSymtab &Symtab,
raw_fd_ostream &OS) {
- OS << Func.Name << "\n";
- OS << "# Func Hash:\n" << Func.Hash << "\n";
+ OS << Name << "\n";
+ OS << "# Func Hash:\n" << Hash << "\n";
OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
OS << "# Counter Values:\n";
for (uint64_t Count : Func.Counts)
@@ -375,6 +384,6 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
for (const auto &Func : I.getValue())
- writeRecordInText(Func.second, Symtab, OS);
+ writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS);
return Error::success();
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 0345a5e3d2a1d..50173f5256bff 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1236,7 +1236,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
<< ": Not enough positional command line arguments specified!\n"
<< "Must specify at least " << NumPositionalRequired
<< " positional argument" << (NumPositionalRequired > 1 ? "s" : "")
- << ": See: " << argv[0] << " - help\n";
+ << ": See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (!HasUnlimitedPositionals &&
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 9398789cea871..d8422115eae81 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/DynamicLibrary.h"
#include "llvm-c/Support.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ManagedStatic.h"
@@ -73,19 +74,37 @@ public:
return true;
}
- void *Lookup(const char *Symbol) {
- // Process handle gets first try.
+ void *LibLookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) {
+ if (Order & SO_LoadOrder) {
+ for (void *Handle : Handles) {
+ if (void *Ptr = DLSym(Handle, Symbol))
+ return Ptr;
+ }
+ } else {
+ for (void *Handle : llvm::reverse(Handles)) {
+ if (void *Ptr = DLSym(Handle, Symbol))
+ return Ptr;
+ }
+ }
+ return nullptr;
+ }
+
+ void *Lookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) {
+ assert(!((Order & SO_LoadedFirst) && (Order & SO_LoadedLast)) &&
+ "Invalid Ordering");
+
+ if (!Process || (Order & SO_LoadedFirst)) {
+ if (void *Ptr = LibLookup(Symbol, Order))
+ return Ptr;
+ }
if (Process) {
+ // Use OS facilities to search the current binary and all loaded libs.
if (void *Ptr = DLSym(Process, Symbol))
return Ptr;
-#ifndef NDEBUG
- for (void *Handle : Handles)
- assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle");
-#endif
- } else {
- // Iterate in reverse, so newer libraries/symbols override older.
- for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) {
- if (void *Ptr = DLSym(*I, Symbol))
+
+ // Search any libs that might have been skipped because of RTLD_LOCAL.
+ if (Order & SO_LoadedLast) {
+ if (void *Ptr = LibLookup(Symbol, Order))
return Ptr;
}
}
@@ -113,6 +132,8 @@ static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> SymbolsMutex;
#endif
char DynamicLibrary::Invalid;
+DynamicLibrary::SearchOrdering DynamicLibrary::SearchOrder =
+ DynamicLibrary::SO_Linker;
namespace llvm {
void *SearchForAddressOfSpecialSymbol(const char *SymbolName) {
@@ -170,7 +191,7 @@ void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) {
// Now search the libraries.
if (OpenedHandles.isConstructed()) {
- if (void *Ptr = OpenedHandles->Lookup(SymbolName))
+ if (void *Ptr = OpenedHandles->Lookup(SymbolName, SearchOrder))
return Ptr;
}
}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index a7d3a18003eee..fe69151665c68 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -20,15 +20,14 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/WindowsError.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdlib>
+#include <mutex>
+#include <new>
#if defined(HAVE_UNISTD_H)
# include <unistd.h>
@@ -43,18 +42,25 @@ using namespace llvm;
static fatal_error_handler_t ErrorHandler = nullptr;
static void *ErrorHandlerUserData = nullptr;
-static ManagedStatic<sys::Mutex> ErrorHandlerMutex;
+static fatal_error_handler_t BadAllocErrorHandler = nullptr;
+static void *BadAllocErrorHandlerUserData = nullptr;
+
+// Mutexes to synchronize installing error handlers and calling error handlers.
+// Do not use ManagedStatic, or that may allocate memory while attempting to
+// report an OOM.
+static std::mutex ErrorHandlerMutex;
+static std::mutex BadAllocErrorHandlerMutex;
void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
void *user_data) {
- llvm::MutexGuard Lock(*ErrorHandlerMutex);
+ std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
assert(!ErrorHandler && "Error handler already registered!\n");
ErrorHandler = handler;
ErrorHandlerUserData = user_data;
}
void llvm::remove_fatal_error_handler() {
- llvm::MutexGuard Lock(*ErrorHandlerMutex);
+ std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
ErrorHandler = nullptr;
ErrorHandlerUserData = nullptr;
}
@@ -77,7 +83,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
{
// Only acquire the mutex while reading the handler, so as not to invoke a
// user-supplied callback under a lock.
- llvm::MutexGuard Lock(*ErrorHandlerMutex);
+ std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
handler = ErrorHandler;
handlerData = ErrorHandlerUserData;
}
@@ -104,6 +110,48 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
exit(1);
}
+void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler,
+ void *user_data) {
+ std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+ assert(!ErrorHandler && "Bad alloc error handler already registered!\n");
+ BadAllocErrorHandler = handler;
+ BadAllocErrorHandlerUserData = user_data;
+}
+
+void llvm::remove_bad_alloc_error_handler() {
+ std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+ BadAllocErrorHandler = nullptr;
+ BadAllocErrorHandlerUserData = nullptr;
+}
+
+void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) {
+ fatal_error_handler_t Handler = nullptr;
+ void *HandlerData = nullptr;
+ {
+ // Only acquire the mutex while reading the handler, so as not to invoke a
+ // user-supplied callback under a lock.
+ std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+ Handler = BadAllocErrorHandler;
+ HandlerData = BadAllocErrorHandlerUserData;
+ }
+
+ if (Handler) {
+ Handler(HandlerData, Reason, GenCrashDiag);
+ llvm_unreachable("bad alloc handler should not return");
+ }
+
+#ifdef LLVM_ENABLE_EXCEPTIONS
+ // If exceptions are enabled, make OOM in malloc look like OOM in new.
+ throw std::bad_alloc();
+#else
+ // Don't call the normal error handler. It may allocate memory. Directly write
+ // an OOM to stderr and abort.
+ char OOMMessage[] = "LLVM ERROR: out of memory\n";
+ (void)::write(2, OOMMessage, strlen(OOMMessage));
+ abort();
+#endif
+}
+
void llvm::llvm_unreachable_internal(const char *msg, const char *file,
unsigned line) {
// This code intentionally doesn't call the ErrorHandler callback, because
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 232efe648b030..9f22f89b3c9ef 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -281,11 +281,17 @@ enum ProcessorVendors {
};
enum ProcessorTypes {
- INTEL_ATOM = 1,
+ INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
+ // Entries below this are not in libgcc/compiler-rt.
INTEL_i386,
INTEL_i486,
INTEL_PENTIUM,
@@ -295,16 +301,13 @@ enum ProcessorTypes {
INTEL_PENTIUM_IV,
INTEL_PENTIUM_M,
INTEL_CORE_DUO,
- INTEL_XEONPHI,
INTEL_X86_64,
INTEL_NOCONA,
INTEL_PRESCOTT,
AMD_i486,
AMDPENTIUM,
AMDATHLON,
- AMDFAM14H,
- AMDFAM16H,
- AMDFAM17H,
+ INTEL_GOLDMONT,
CPU_TYPE_MAX
};
@@ -317,34 +320,26 @@ enum ProcessorSubtypes {
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
- INTEL_PENTIUM_MMX,
- INTEL_CORE2_65,
- INTEL_CORE2_45,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
- INTEL_ATOM_BONNELL,
- INTEL_ATOM_SILVERMONT,
- INTEL_ATOM_GOLDMONT,
- INTEL_KNIGHTS_LANDING,
+ // Entries below this are not in libgcc/compiler-rt.
+ INTEL_PENTIUM_MMX,
+ INTEL_CORE2_65,
+ INTEL_CORE2_45,
AMDPENTIUM_K6,
AMDPENTIUM_K62,
AMDPENTIUM_K63,
AMDPENTIUM_GEODE,
- AMDATHLON_TBIRD,
- AMDATHLON_MP,
+ AMDATHLON_CLASSIC,
AMDATHLON_XP,
+ AMDATHLON_K8,
AMDATHLON_K8SSE3,
- AMDATHLON_OPTERON,
- AMDATHLON_FX,
- AMDATHLON_64,
- AMD_BTVER1,
- AMD_BTVER2,
- AMDFAM15H_BDVER3,
- AMDFAM15H_BDVER4,
- AMDFAM17H_ZNVER1,
CPU_SUBTYPE_MAX
};
@@ -360,9 +355,28 @@ enum ProcessorFeatures {
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
- FEATURE_AVX512,
- FEATURE_AVX512SAVE,
- FEATURE_MOVBE,
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ,
+ // Only one bit free left in the first 32 features.
+ FEATURE_MOVBE = 32,
FEATURE_ADX,
FEATURE_EM64T
};
@@ -406,7 +420,6 @@ static bool isCpuIdSupported() {
/// the specified arguments. If we can't run cpuid on the host, return true.
static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
-#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#if defined(__GNUC__) || defined(__clang__)
#if defined(__x86_64__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
@@ -416,14 +429,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
"xchgq\t%%rbx, %%rsi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value));
+ return false;
#elif defined(__i386__)
__asm__("movl\t%%ebx, %%esi\n\t"
"cpuid\n\t"
"xchgl\t%%ebx, %%esi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value));
+ return false;
#else
- assert(0 && "This method is defined only for x86.");
+ return true;
#endif
#elif defined(_MSC_VER)
// The MSVC intrinsic is portable across x86 and x64.
@@ -433,7 +448,6 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
*rEBX = registers[1];
*rECX = registers[2];
*rEDX = registers[3];
-#endif
return false;
#else
return true;
@@ -446,16 +460,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
unsigned *rEDX) {
-#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#if defined(__x86_64__) || defined(_M_X64)
#if defined(__GNUC__) || defined(__clang__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
// FIXME: should we save this for Clang?
__asm__("movq\t%%rbx, %%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx, %%rsi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
+ return false;
#elif defined(_MSC_VER)
int registers[4];
__cpuidex(registers, value, subleaf);
@@ -463,6 +477,9 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
*rEBX = registers[1];
*rECX = registers[2];
*rEDX = registers[3];
+ return false;
+#else
+ return true;
#endif
#elif defined(__i386__) || defined(_M_IX86)
#if defined(__GNUC__) || defined(__clang__)
@@ -471,6 +488,7 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
"xchgl\t%%ebx, %%esi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
+ return false;
#elif defined(_MSC_VER)
__asm {
mov eax,value
@@ -485,16 +503,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
mov esi,rEDX
mov dword ptr [esi],edx
}
-#endif
+ return false;
#else
- assert(0 && "This method is defined only for x86.");
+ return true;
#endif
- return false;
#else
return true;
#endif
}
+// Read control register 0 (XCR0). Used to detect features such as AVX.
static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
#if defined(__GNUC__) || defined(__clang__)
// Check xgetbv; this uses a .byte sequence instead of the instruction
@@ -526,9 +544,10 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
static void
-getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
- unsigned int Brand_id, unsigned int Features,
- unsigned *Type, unsigned *Subtype) {
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Brand_id, unsigned Features,
+ unsigned Features2, unsigned *Type,
+ unsigned *Subtype) {
if (Brand_id != 0)
return;
switch (Family) {
@@ -681,12 +700,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
// Skylake Xeon:
case 0x55:
*Type = INTEL_COREI7;
- // Check that we really have AVX512
- if (Features & (1 << FEATURE_AVX512)) {
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
- } else {
- *Subtype = INTEL_COREI7_SKYLAKE; // "skylake"
- }
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
break;
case 0x1c: // Most 45 nm Intel Atom processors
@@ -694,8 +708,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
case 0x27: // 32 nm Atom Medfield
case 0x35: // 32 nm Atom Midview
case 0x36: // 32 nm Atom Midview
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_BONNELL;
+ *Type = INTEL_BONNELL;
break; // "bonnell"
// Atom Silvermont codes from the Intel software optimization guide.
@@ -705,27 +718,23 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
case 0x5a:
case 0x5d:
case 0x4c: // really airmont
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_SILVERMONT;
+ *Type = INTEL_SILVERMONT;
break; // "silvermont"
// Goldmont:
case 0x5c:
case 0x5f:
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_GOLDMONT;
+ *Type = INTEL_GOLDMONT;
break; // "goldmont"
case 0x57:
- *Type = INTEL_XEONPHI; // knl
- *Subtype = INTEL_KNIGHTS_LANDING;
+ *Type = INTEL_KNL; // knl
break;
default: // Unknown family 6 CPU, try to guess.
- if (Features & (1 << FEATURE_AVX512)) {
- *Type = INTEL_XEONPHI; // knl
- *Subtype = INTEL_KNIGHTS_LANDING;
+ if (Features & (1 << FEATURE_AVX512F)) {
+ *Type = INTEL_KNL; // knl
break;
}
- if (Features & (1 << FEATURE_ADX)) {
+ if (Features2 & (1 << (FEATURE_ADX - 32))) {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_BROADWELL;
break;
@@ -741,9 +750,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
break;
}
if (Features & (1 << FEATURE_SSE4_2)) {
- if (Features & (1 << FEATURE_MOVBE)) {
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_SILVERMONT;
+ if (Features2 & (1 << (FEATURE_MOVBE - 32))) {
+ *Type = INTEL_SILVERMONT;
} else {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_NEHALEM;
@@ -756,16 +764,15 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
break;
}
if (Features & (1 << FEATURE_SSSE3)) {
- if (Features & (1 << FEATURE_MOVBE)) {
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
+ if (Features2 & (1 << (FEATURE_MOVBE - 32))) {
+ *Type = INTEL_BONNELL; // "bonnell"
} else {
*Type = INTEL_CORE2; // "core2"
*Subtype = INTEL_CORE2_65;
}
break;
}
- if (Features & (1 << FEATURE_EM64T)) {
+ if (Features2 & (1 << (FEATURE_EM64T - 32))) {
*Type = INTEL_X86_64;
break; // x86-64
}
@@ -796,8 +803,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
// Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
// processor, and Mobile Intel Celeron processor. All processors
// are model 02h and manufactured using the 0.13 micron process.
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+ *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64
+ : INTEL_PENTIUM_IV);
break;
case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
@@ -811,13 +818,13 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
// Extreme Edition, Intel Xeon processor, Intel Xeon processor
// MP, Intel Celeron D processor. All processors are model 06h
// and manufactured using the 65 nm process.
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
+ *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_NOCONA
+ : INTEL_PRESCOTT);
break;
default:
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+ *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64
+ : INTEL_PENTIUM_IV);
break;
}
break;
@@ -827,10 +834,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
}
}
-static void getAMDProcessorTypeAndSubtype(unsigned int Family,
- unsigned int Model,
- unsigned int Features,
- unsigned *Type,
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Features, unsigned *Type,
unsigned *Subtype) {
// FIXME: this poorly matches the generated SubtargetFeatureKV table. There
// appears to be no way to generate the wide variety of AMD-specific targets
@@ -860,38 +865,20 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
break;
case 6:
*Type = AMDATHLON;
- switch (Model) {
- case 4:
- *Subtype = AMDATHLON_TBIRD;
- break; // "athlon-tbird"
- case 6:
- case 7:
- case 8:
- *Subtype = AMDATHLON_MP;
- break; // "athlon-mp"
- case 10:
+ if (Features & (1 << FEATURE_SSE)) {
*Subtype = AMDATHLON_XP;
break; // "athlon-xp"
}
- break;
+ *Subtype = AMDATHLON_CLASSIC;
+ break; // "athlon"
case 15:
*Type = AMDATHLON;
if (Features & (1 << FEATURE_SSE3)) {
*Subtype = AMDATHLON_K8SSE3;
break; // "k8-sse3"
}
- switch (Model) {
- case 1:
- *Subtype = AMDATHLON_OPTERON;
- break; // "opteron"
- case 5:
- *Subtype = AMDATHLON_FX;
- break; // "athlon-fx"; also opteron
- default:
- *Subtype = AMDATHLON_64;
- break; // "athlon64"
- }
- break;
+ *Subtype = AMDATHLON_K8;
+ break; // "k8"
case 16:
*Type = AMDFAM10H; // "amdfam10"
switch (Model) {
@@ -907,19 +894,13 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
}
break;
case 20:
- *Type = AMDFAM14H;
- *Subtype = AMD_BTVER1;
+ *Type = AMD_BTVER1;
break; // "btver1";
case 21:
*Type = AMDFAM15H;
- if (!(Features &
- (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
- *Subtype = AMD_BTVER1;
- break; // "btver1"
- }
- if (Model >= 0x50 && Model <= 0x6f) {
+ if (Model >= 0x60 && Model <= 0x7f) {
*Subtype = AMDFAM15H_BDVER4;
- break; // "bdver4"; 50h-6Fh: Excavator
+ break; // "bdver4"; 60h-7Fh: Excavator
}
if (Model >= 0x30 && Model <= 0x3f) {
*Subtype = AMDFAM15H_BDVER3;
@@ -935,39 +916,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
}
break;
case 22:
- *Type = AMDFAM16H;
- if (!(Features &
- (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
- *Subtype = AMD_BTVER1;
- break; // "btver1";
- }
- *Subtype = AMD_BTVER2;
+ *Type = AMD_BTVER2;
break; // "btver2"
case 23:
*Type = AMDFAM17H;
- if (Features & (1 << FEATURE_ADX)) {
- *Subtype = AMDFAM17H_ZNVER1;
- break; // "znver1"
- }
- *Subtype = AMD_BTVER1;
+ *Subtype = AMDFAM17H_ZNVER1;
break;
default:
break; // "generic"
}
}
-static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
- unsigned MaxLeaf) {
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+ unsigned *FeaturesOut,
+ unsigned *Features2Out) {
unsigned Features = 0;
- unsigned int EAX, EBX;
- Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
- Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
- Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
- Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
- Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
- Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
- Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
- Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+ unsigned Features2 = 0;
+ unsigned EAX, EBX;
+
+ if ((EDX >> 15) & 1)
+ Features |= 1 << FEATURE_CMOV;
+ if ((EDX >> 23) & 1)
+ Features |= 1 << FEATURE_MMX;
+ if ((EDX >> 25) & 1)
+ Features |= 1 << FEATURE_SSE;
+ if ((EDX >> 26) & 1)
+ Features |= 1 << FEATURE_SSE2;
+
+ if ((ECX >> 0) & 1)
+ Features |= 1 << FEATURE_SSE3;
+ if ((ECX >> 1) & 1)
+ Features |= 1 << FEATURE_PCLMUL;
+ if ((ECX >> 9) & 1)
+ Features |= 1 << FEATURE_SSSE3;
+ if ((ECX >> 12) & 1)
+ Features |= 1 << FEATURE_FMA;
+ if ((ECX >> 19) & 1)
+ Features |= 1 << FEATURE_SSE4_1;
+ if ((ECX >> 20) & 1)
+ Features |= 1 << FEATURE_SSE4_2;
+ if ((ECX >> 23) & 1)
+ Features |= 1 << FEATURE_POPCNT;
+ if ((ECX >> 25) & 1)
+ Features |= 1 << FEATURE_AES;
+
+ if ((ECX >> 22) & 1)
+ Features2 |= 1 << (FEATURE_MOVBE - 32);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
@@ -976,20 +970,65 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
((EAX & 0x6) == 0x6);
bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+
+ if (HasAVX)
+ Features |= 1 << FEATURE_AVX;
+
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
- bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
- bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
- bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
- Features |= (HasAVX << FEATURE_AVX);
- Features |= (HasAVX2 << FEATURE_AVX2);
- Features |= (HasAVX512 << FEATURE_AVX512);
- Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
- Features |= (HasADX << FEATURE_ADX);
-
- getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
- return Features;
+
+ if (HasLeaf7 && ((EBX >> 3) & 1))
+ Features |= 1 << FEATURE_BMI;
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ Features |= 1 << FEATURE_AVX2;
+ if (HasLeaf7 && ((EBX >> 9) & 1))
+ Features |= 1 << FEATURE_BMI2;
+ if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512F;
+ if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512DQ;
+ if (HasLeaf7 && ((EBX >> 19) & 1))
+ Features2 |= 1 << (FEATURE_ADX - 32);
+ if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512IFMA;
+ if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512PF;
+ if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512ER;
+ if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512CD;
+ if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512BW;
+ if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VL;
+
+ if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VBMI;
+ if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
+
+ if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX5124VNNIW;
+ if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX5124FMAPS;
+
+ unsigned MaxExtLevel;
+ getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+ !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf1 && ((ECX >> 6) & 1))
+ Features |= 1 << FEATURE_SSE4_A;
+ if (HasExtLeaf1 && ((ECX >> 11) & 1))
+ Features |= 1 << FEATURE_XOP;
+ if (HasExtLeaf1 && ((ECX >> 16) & 1))
+ Features |= 1 << FEATURE_FMA4;
+
+ if (HasExtLeaf1 && ((EDX >> 29) & 1))
+ Features2 |= 1 << (FEATURE_EM64T - 32);
+
+ *FeaturesOut = Features;
+ *Features2Out = Features2;
}
StringRef sys::getHostCPUName() {
@@ -1004,23 +1043,22 @@ StringRef sys::getHostCPUName() {
if(!isCpuIdSupported())
return "generic";
#endif
- if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX))
- return "generic";
- if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1)
return "generic";
+ getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
unsigned Brand_id = EBX & 0xff;
unsigned Family = 0, Model = 0;
- unsigned Features = 0;
+ unsigned Features = 0, Features2 = 0;
detectX86FamilyModel(EAX, &Family, &Model);
- Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
unsigned Type;
unsigned Subtype;
if (Vendor == SIG_INTEL) {
- getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type,
- &Subtype);
+ getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
+ Features2, &Type, &Subtype);
switch (Type) {
case INTEL_i386:
return "i386";
@@ -1049,7 +1087,7 @@ StringRef sys::getHostCPUName() {
case INTEL_CORE2_45:
return "penryn";
default:
- return "core2";
+ llvm_unreachable("Unexpected subtype!");
}
case INTEL_COREI7:
switch (Subtype) {
@@ -1070,21 +1108,16 @@ StringRef sys::getHostCPUName() {
case INTEL_COREI7_SKYLAKE_AVX512:
return "skylake-avx512";
default:
- return "corei7";
+ llvm_unreachable("Unexpected subtype!");
}
- case INTEL_ATOM:
- switch (Subtype) {
- case INTEL_ATOM_BONNELL:
- return "bonnell";
- case INTEL_ATOM_GOLDMONT:
- return "goldmont";
- case INTEL_ATOM_SILVERMONT:
- return "silvermont";
- default:
- return "atom";
- }
- case INTEL_XEONPHI:
- return "knl"; /*update for more variants added*/
+ case INTEL_BONNELL:
+ return "bonnell";
+ case INTEL_SILVERMONT:
+ return "silvermont";
+ case INTEL_GOLDMONT:
+ return "goldmont";
+ case INTEL_KNL:
+ return "knl";
case INTEL_X86_64:
return "x86-64";
case INTEL_NOCONA:
@@ -1092,7 +1125,7 @@ StringRef sys::getHostCPUName() {
case INTEL_PRESCOTT:
return "prescott";
default:
- return "generic";
+ break;
}
} else if (Vendor == SIG_AMD) {
getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
@@ -1114,31 +1147,24 @@ StringRef sys::getHostCPUName() {
}
case AMDATHLON:
switch (Subtype) {
- case AMDATHLON_TBIRD:
- return "athlon-tbird";
- case AMDATHLON_MP:
- return "athlon-mp";
+ case AMDATHLON_CLASSIC:
+ return "athlon";
case AMDATHLON_XP:
return "athlon-xp";
+ case AMDATHLON_K8:
+ return "k8";
case AMDATHLON_K8SSE3:
return "k8-sse3";
- case AMDATHLON_OPTERON:
- return "opteron";
- case AMDATHLON_FX:
- return "athlon-fx";
- case AMDATHLON_64:
- return "athlon64";
default:
- return "athlon";
+ llvm_unreachable("Unexpected subtype!");
}
case AMDFAM10H:
- if(Subtype == AMDFAM10H_BARCELONA)
- return "barcelona";
return "amdfam10";
- case AMDFAM14H:
+ case AMD_BTVER1:
return "btver1";
case AMDFAM15H:
switch (Subtype) {
+ default: // There are gaps in the subtype detection.
case AMDFAM15H_BDVER1:
return "bdver1";
case AMDFAM15H_BDVER2:
@@ -1147,31 +1173,13 @@ StringRef sys::getHostCPUName() {
return "bdver3";
case AMDFAM15H_BDVER4:
return "bdver4";
- case AMD_BTVER1:
- return "btver1";
- default:
- return "amdfam15";
- }
- case AMDFAM16H:
- switch (Subtype) {
- case AMD_BTVER1:
- return "btver1";
- case AMD_BTVER2:
- return "btver2";
- default:
- return "amdfam16";
}
+ case AMD_BTVER2:
+ return "btver2";
case AMDFAM17H:
- switch (Subtype) {
- case AMD_BTVER1:
- return "btver1";
- case AMDFAM17H_ZNVER1:
- return "znver1";
- default:
- return "amdfam17";
- }
+ return "znver1";
default:
- return "generic";
+ break;
}
}
return "generic";
@@ -1494,7 +1502,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
#endif
std::string sys::getProcessTriple() {
- Triple PT(Triple::normalize(LLVM_HOST_TRIPLE));
+ std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
+ Triple PT(Triple::normalize(TargetTripleString));
if (sizeof(void *) == 8 && PT.isArch32Bit())
PT = PT.get64BitArchVariant();
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index bdd02105f6f0e..b1d5e7c0d9912 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Config/config.h"
//===----------------------------------------------------------------------===//
@@ -47,6 +48,10 @@ MutexImpl::MutexImpl( bool recursive)
// Declare the pthread_mutex data structures
pthread_mutex_t* mutex =
static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+
+ if (mutex == nullptr)
+ report_bad_alloc_error("Mutex allocation failed");
+
pthread_mutexattr_t attr;
// Initialize the mutex attributes
diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc
index aad77f19c35a5..f05103ccd1ebe 100644
--- a/lib/Support/Unix/DynamicLibrary.inc
+++ b/lib/Support/Unix/DynamicLibrary.inc
@@ -20,6 +20,9 @@ DynamicLibrary::HandleSet::~HandleSet() {
::dlclose(Handle);
if (Process)
::dlclose(Process);
+
+ // llvm_shutdown called, Return to default
+ DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker;
}
void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) {
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index 0ba6a25aa198d..5580e63893c6d 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -34,18 +34,31 @@ static std::string getOSVersion() {
return info.release;
}
-std::string sys::getDefaultTargetTriple() {
- std::string TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
-
- // On darwin, we want to update the version to match that of the
- // target.
+static std::string updateTripleOSVersion(std::string TargetTripleString) {
+ // On darwin, we want to update the version to match that of the target.
std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin");
if (DarwinDashIdx != std::string::npos) {
TargetTripleString.resize(DarwinDashIdx + strlen("-darwin"));
TargetTripleString += getOSVersion();
+ return TargetTripleString;
+ }
+ std::string::size_type MacOSDashIdx = TargetTripleString.find("-macos");
+ if (MacOSDashIdx != std::string::npos) {
+ TargetTripleString.resize(MacOSDashIdx);
+ // Reset the OS to darwin as the OS version from `uname` doesn't use the
+ // macOS version scheme.
+ TargetTripleString += "-darwin";
+ TargetTripleString += getOSVersion();
}
+ return TargetTripleString;
+}
+
+std::string sys::getDefaultTargetTriple() {
+ std::string TargetTripleString =
+ updateTripleOSVersion(LLVM_DEFAULT_TARGET_TRIPLE);
- // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV.
+ // Override the default target with an environment variable named by
+ // LLVM_TARGET_TRIPLE_ENV.
#if defined(LLVM_TARGET_TRIPLE_ENV)
if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV))
TargetTripleString = EnvTriple;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 1704fa4799428..c866d5b5a84ef 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -40,9 +40,6 @@
#include <unistd.h>
#endif
#ifdef HAVE_POSIX_SPAWN
-#ifdef __sun__
-#define _RESTRICT_KYWD
-#endif
#include <spawn.h>
#if defined(__APPLE__)
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index caf1a0a658de0..083ea902eeb29 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -28,6 +28,8 @@ DynamicLibrary::HandleSet::~HandleSet() {
// 'Process' should not be released on Windows.
assert((!Process || Process==this) && "Bad Handle");
+ // llvm_shutdown called, Return to default
+ DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker;
}
void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) {
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 7e196cf0ce18a..90a6fb316703f 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -17,6 +17,10 @@
using namespace llvm;
+static std::string updateTripleOSVersion(std::string Triple) {
+ return Triple;
+}
+
std::string sys::getDefaultTargetTriple() {
const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE;
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 4a7e0b2b803ee..db1fbe069f4d2 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -509,7 +509,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
assert(ChainBegin != ChainEnd && "Chain should contain instructions");
do {
--I;
- Units.accumulateBackward(*I);
+ Units.accumulate(*I);
} while (I != ChainBegin);
// Make sure we allocate in-order, to get the cheapest registers first.
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 6f8dd3e3ac0ca..b3b738584b409 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -113,7 +113,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
return Copy;
}
- // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // Create a virtual register in *TLSBaseAddrReg, and populate it by
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I.getParent()->getParent();
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 0a948812ff337..51700f9059799 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -167,6 +167,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
case AArch64::SUBWrs:
case AArch64::SUBWrx:
IsFlagSetting = false;
+ LLVM_FALLTHROUGH;
case AArch64::ADDSWri:
case AArch64::ADDSWrr:
case AArch64::ADDSWrs:
@@ -226,6 +227,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
case AArch64::SUBXrs:
case AArch64::SUBXrx:
IsFlagSetting = false;
+ LLVM_FALLTHROUGH;
case AArch64::ADDSXri:
case AArch64::ADDSXrr:
case AArch64::ADDSXrs:
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 7bf2097c17cee..3682b62d2b84d 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2114,7 +2114,7 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: VTIsi1 = true;
+ case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
case MVT::i8: Opc = OpcTable[Idx][0]; break;
case MVT::i16: Opc = OpcTable[Idx][1]; break;
case MVT::i32: Opc = OpcTable[Idx][2]; break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index aaf32a499bc3c..60fde5caa3393 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8364,9 +8364,9 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:
-/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
-/// EXTR. Can't quite be done in TableGen because the two immediates aren't
-/// independent.
+/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
+/// with an EXTR. Can't quite be done in TableGen because the two immediates
+/// aren't independent.
static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -9531,7 +9531,7 @@ static SDValue performPostLD1Combine(SDNode *N,
return SDValue();
}
-/// Simplify \Addr given that the top byte of it is ignored by HW during
+/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
/// address translation.
static bool performTBISimplification(SDValue Addr,
TargetLowering::DAGCombinerInfo &DCI,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 314e89bbca863..dba3e4bdf82f1 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1282,6 +1282,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
case AArch64CC::HI: // Z clear and C set
case AArch64CC::LS: // Z set or C clear
UsedFlags.Z = true;
+ LLVM_FALLTHROUGH;
case AArch64CC::HS: // C set
case AArch64CC::LO: // C clear
UsedFlags.C = true;
@@ -1300,6 +1301,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
case AArch64CC::GT: // Z clear, N and V the same
case AArch64CC::LE: // Z set, N and V differ
UsedFlags.Z = true;
+ LLVM_FALLTHROUGH;
case AArch64CC::GE: // N and V the same
case AArch64CC::LT: // N and V differ
UsedFlags.N = true;
@@ -3669,12 +3671,17 @@ enum class FMAInstKind { Default, Indexed, Accumulator };
/// F|MUL I=A,B,0
/// F|ADD R,I,C
/// ==> F|MADD R,A,B,C
+/// \param MF Containing MachineFunction
+/// \param MRI Register information
+/// \param TII Target information
/// \param Root is the F|ADD instruction
/// \param [out] InsInstrs is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
/// the F|MUL. In the example above IdxMulOpd is 1.
/// \param MaddOpc the opcode fo the f|madd instruction
+/// \param RC Register class of operands
+/// \param kind of fma instruction (addressing mode) to be generated
static MachineInstr *
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
@@ -3733,6 +3740,9 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
/// ADD R,I,Imm
/// ==> ORR V, ZR, Imm
/// ==> MADD R,A,B,V
+/// \param MF Containing MachineFunction
+/// \param MRI Register information
+/// \param TII Target information
/// \param Root is the ADD instruction
/// \param [out] InsInstrs is a vector of machine instructions and will
/// contain the generated madd instruction
@@ -3741,6 +3751,7 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
/// \param MaddOpc the opcode fo the madd instruction
/// \param VR is a virtual register that holds the value of an ADD operand
/// (V in the example above).
+/// \param RC Register class of operands
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -4216,26 +4227,36 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
/// \brief Replace csincr-branch sequence by simple conditional branch
///
/// Examples:
-/// 1.
+/// 1. \code
/// csinc w9, wzr, wzr, <condition code>
/// tbnz w9, #0, 0x44
+/// \endcode
/// to
+/// \code
/// b.<inverted condition code>
+/// \endcode
///
-/// 2.
+/// 2. \code
/// csinc w9, wzr, wzr, <condition code>
/// tbz w9, #0, 0x44
+/// \endcode
/// to
+/// \code
/// b.<condition code>
+/// \endcode
///
/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
/// compare's constant operand is power of 2.
///
/// Examples:
+/// \code
/// and w8, w8, #0x400
/// cbnz w8, L1
+/// \endcode
/// to
+/// \code
/// tbnz w8, #10, L1
+/// \endcode
///
/// \param MI Conditional Branch
/// \return True when the simple conditional branch is generated
@@ -4409,6 +4430,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
+ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
+ static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
+ {{MOSuppressPair, "aarch64-suppress-pair"}};
+ return makeArrayRef(TargetFlags);
+}
+
unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
size_t Occurrences,
bool CanBeTailCall) const {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 58e9ce583d44c..0809ede4df2a5 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -263,8 +263,8 @@ public:
/// \param Pattern - combiner pattern
bool isThroughputPattern(MachineCombinerPattern Pattern) const override;
/// Return true when there is potentially a faster code sequence
- /// for an instruction chain ending in <Root>. All potential patterns are
- /// listed in the <Patterns> array.
+ /// for an instruction chain ending in ``Root``. All potential patterns are
+ /// listed in the ``Patterns`` array.
bool getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns)
const override;
@@ -289,6 +289,8 @@ public:
getSerializableDirectMachineOperandTargetFlags() const override;
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+ getSerializableMachineMemOperandTargetFlags() const override;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 6cb723d187af5..0be14673eb20b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -313,9 +313,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
//===----------------------------------------------------------------------===//
// AArch64 Instruction Predicate Definitions.
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
-
// We could compute these on a per-module basis but doing so requires accessing
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
@@ -714,10 +711,10 @@ def : InstAlias<"negs $dst, $src$shift",
defm UDIV : Div<0, "udiv", udiv>;
defm SDIV : Div<1, "sdiv", sdiv>;
-def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr $Rn, $Rm)>;
-def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr $Rn, $Rm)>;
-def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr $Rn, $Rm)>;
-def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr $Rn, $Rm)>;
+def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
// Variable shift
defm ASRV : Shift<0b10, "asr", sra>;
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 07ce0e863c5e0..7e275e4d2f463 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -33,6 +33,8 @@
#define DEBUG_TYPE "aarch64-isel"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -212,6 +214,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
return GenericOpc;
}
}
+ break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 32:
@@ -243,7 +246,8 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
return GenericOpc;
}
}
- };
+ break;
+ }
return GenericOpc;
}
@@ -267,6 +271,7 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
case 64:
return isStore ? AArch64::STRXui : AArch64::LDRXui;
}
+ break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 8:
@@ -278,7 +283,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
}
- };
+ break;
+ }
return GenericOpc;
}
@@ -1319,6 +1325,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
case TargetOpcode::G_VASTART:
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
: selectVaStartAAPCS(I, MF, MRI);
+ case TargetOpcode::G_IMPLICIT_DEF:
+ I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+ return true;
}
return false;
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 4b568f3fba2ba..4a0a7c36baf8b 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -291,11 +291,10 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
unsigned DstPtr;
if (Align > PtrSize) {
// Realign the list to the actual required alignment.
- unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy);
- MIRBuilder.buildConstant(AlignMinus1, Align - 1);
+ auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(ListTmp, List, AlignMinus1);
+ MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
DstPtr = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index f3c8e7e9bdc2b..4e65c0ab6011b 100644
--- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -163,6 +163,7 @@ AArch64RedundantCopyElimination::knownRegValInBlock(
case AArch64::ADDSWri:
case AArch64::ADDSXri:
IsCMN = true;
+ LLVM_FALLTHROUGH;
// CMP is an alias for SUBS with a dead destination register.
case AArch64::SUBSWri:
case AArch64::SUBSXri: {
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index a9a9d5ce84297..a3238cf3b60f0 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -81,6 +81,7 @@ void AArch64Subtarget::initializeProperties() {
break;
case CortexA57:
MaxInterleaveFactor = 4;
+ PrefFunctionAlignment = 4;
break;
case ExynosM1:
MaxInterleaveFactor = 4;
@@ -130,7 +131,9 @@ void AArch64Subtarget::initializeProperties() {
break;
case CortexA35: break;
case CortexA53: break;
- case CortexA72: break;
+ case CortexA72:
+ PrefFunctionAlignment = 4;
+ break;
case CortexA73: break;
case Others: break;
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 7933e58c49eed..db53946cbc77f 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -218,6 +218,13 @@ public:
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
+
+ /// \brief Return true if the CPU supports any kind of instruction fusion.
+ bool hasFusion() const {
+ return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
+ hasFuseAES() || hasFuseLiterals();
+ }
+
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 1252f9403812e..6237b8f3e7b9b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -277,17 +277,19 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ if (ST.hasFusion())
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
- if (ST.hasFuseAES() || ST.hasFuseLiterals()) {
+ if (ST.hasFusion()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 0d860a7eef794..7870dce5c9c0f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -756,7 +756,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
// if shift == '11' then ReservedValue()
if (shiftHi == 0x3)
return Fail;
- // Deliberate fallthrough
+ LLVM_FALLTHROUGH;
case AArch64::ANDWrs:
case AArch64::ANDSWrs:
case AArch64::BICWrs:
@@ -780,7 +780,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
// if shift == '11' then ReservedValue()
if (shiftHi == 0x3)
return Fail;
- // Deliberate fallthrough
+ LLVM_FALLTHROUGH;
case AArch64::ANDXrs:
case AArch64::ANDSXrs:
case AArch64::BICXrs:
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 475f91016840a..a7a7daf4b4a55 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -73,7 +73,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -264,7 +264,7 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index fc808ee0cdd64..c25bd8c8f6cc9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -103,4 +103,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
CommentString = ";";
+ PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 6f002860044c0..ed5370826647f 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -108,10 +108,11 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
DFS(Start, Checklist);
for (auto &BB : Checklist) {
BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
- BasicBlock::iterator(Load) : BB->end();
- if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
- true, StartIt, BB, Load).isClobber())
- return true;
+ BasicBlock::iterator(Load) : BB->end();
+ auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
+ StartIt, BB, Load);
+ if (Q.isClobber() || Q.isUnknown())
+ return true;
}
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index b312dbc8d14d6..31ee9206ae27b 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -380,7 +380,9 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
FastMathFlags FMF = FPOp->getFastMathFlags();
bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
FMF.allowReciprocal();
- if (ST->hasFP32Denormals() && !UnsafeDiv)
+
+ // With UnsafeDiv node will be optimized to just rcp and mul.
+ if (ST->hasFP32Denormals() || UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 96f819fd0e684..2553cf4da0feb 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2651,8 +2651,11 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
return DAG.getZExtOrTrunc(Shl, SL, VT);
}
- case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break;
- case ISD::ADD: { // Fall through from above
+ case ISD::OR:
+ if (!isOrEquivalentToAdd(DAG, LHS))
+ break;
+ LLVM_FALLTHROUGH;
+ case ISD::ADD: {
// shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1)
if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0),
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 846e7dff5f8cc..7e0e9802c0e6d 100644
--- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -10,6 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -34,9 +35,14 @@ public:
AMDGPULowerIntrinsics() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
+ bool expandMemIntrinsicUses(Function &F);
StringRef getPassName() const override {
return "AMDGPU Lower Intrinsics";
}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
};
}
@@ -55,7 +61,7 @@ static bool shouldExpandOperationWithSize(Value *Size) {
return !CI || (CI->getZExtValue() > MaxStaticSize);
}
-static bool expandMemIntrinsicUses(Function &F) {
+bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
@@ -67,7 +73,10 @@ static bool expandMemIntrinsicUses(Function &F) {
case Intrinsic::memcpy: {
auto *Memcpy = cast<MemCpyInst>(Inst);
if (shouldExpandOperationWithSize(Memcpy->getLength())) {
- expandMemCpyAsLoop(Memcpy);
+ Function *ParentFunc = Memcpy->getParent()->getParent();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
+ expandMemCpyAsLoop(Memcpy, TTI);
Changed = true;
Memcpy->eraseFromParent();
}
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
new file mode 100644
index 0000000000000..7263ba73d1550
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -0,0 +1,64 @@
+//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the AMDGPU implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMacroFusion.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+
+#include "llvm/CodeGen/MacroFusion.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const SIInstrInfo &TII = static_cast<const SIInstrInfo&>(TII_);
+
+ switch (SecondMI.getOpcode()) {
+ case AMDGPU::V_ADDC_U32_e64:
+ case AMDGPU::V_SUBB_U32_e64:
+ case AMDGPU::V_CNDMASK_B32_e64: {
+ // Try to cluster defs of condition registers to their uses. This improves
+ // the chance VCC will be available which will allow shrinking to VOP2
+ // encodings.
+ if (!FirstMI)
+ return true;
+
+ const MachineOperand *Src2 = TII.getNamedOperand(SecondMI,
+ AMDGPU::OpName::src2);
+ return FirstMI->definesRegister(Src2->getReg());
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+} // end namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
new file mode 100644
index 0000000000000..844958580a65b
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -0,0 +1,19 @@
+//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createAMDGPUMacroFusionDAGMutation());
+/// to AMDGPUPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation();
+
+} // llvm
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index be47b900c6f06..1bc5a52053ecb 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,6 +13,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "AMDGPUCallLowering.h"
+#include "AMDGPUInstructionSelector.h"
+#include "AMDGPULegalizerInfo.h"
+#include "AMDGPURegisterBankInfo.h"
+#endif
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
@@ -72,6 +80,31 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
return *this;
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct SIGISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ const AMDGPUCallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM)
: AMDGPUGenSubtargetInfo(TT, GPU, FS),
@@ -265,18 +298,21 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
IdQuery = true;
+ LLVM_FALLTHROUGH;
case Intrinsic::r600_read_local_size_x:
Dim = 0;
break;
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
IdQuery = true;
+ LLVM_FALLTHROUGH;
case Intrinsic::r600_read_local_size_y:
Dim = 1;
break;
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
IdQuery = true;
+ LLVM_FALLTHROUGH;
case Intrinsic::r600_read_local_size_z:
Dim = 2;
break;
@@ -317,11 +353,23 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
TLInfo(TM, *this) {}
SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM) :
- AMDGPUSubtarget(TT, GPU, FS, TM),
- InstrInfo(*this),
- FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- TLInfo(TM, *this) {}
+ const TargetMachine &TM)
+ : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ TLInfo(TM, *this) {
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new AMDGPULegalizerInfo());
+
+ GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ GISel->InstSelector.reset(new AMDGPUInstructionSelector(
+ *this, *static_cast<AMDGPURegisterBankInfo *>(GISel->RegBankInfo.get())));
+#endif
+ setGISelAccessor(*GISel);
+}
void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 425fd35d47de6..dc868f010d85c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -19,9 +19,7 @@
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "AMDGPURegisterBankInfo.h"
-#endif
+#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
@@ -85,7 +83,7 @@ static cl::opt<bool> EnableLoadStoreVectorizer(
static cl::opt<bool> ScalarizeGlobal(
"amdgpu-scalarize-global-loads",
cl::desc("Enable global load scalarization"),
- cl::init(false),
+ cl::init(true),
cl::Hidden);
// Option to run internalize pass.
@@ -176,6 +174,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;
}
@@ -389,31 +388,6 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct SIGISelActualAccessor : public GISelAccessor {
- std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- const AMDGPUCallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -435,21 +409,6 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
-
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
- GISel->CallLoweringInfo.reset(
- new AMDGPUCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new AMDGPULegalizerInfo());
-
- GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
- GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
- *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get())));
-#endif
-
- I->setGISelAccessor(*GISel);
}
I->setScalarizeGlobalBehavior(ScalarizeGlobal);
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7b8756050b752..e3c90f250600a 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1058,17 +1058,13 @@ public:
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
- void cvtId(MCInst &Inst, const OperandVector &Operands);
- void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
-
- void cvtVOP3Impl(MCInst &Inst,
- const OperandVector &Operands,
- OptionalImmIndexMap &OptionalIdx);
+ void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
- void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
- void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
+ void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
+ bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
@@ -3870,13 +3866,19 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
// mimg
//===----------------------------------------------------------------------===//
-void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
+void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
+ bool IsAtomic) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
+ if (IsAtomic) {
+ // Add src, same as dst
+ ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
+ }
+
OptionalImmIndexMap OptionalIdx;
for (unsigned E = Operands.size(); I != E; ++I) {
@@ -3904,39 +3906,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
}
void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
- unsigned I = 1;
- const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
- for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
- ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
- }
-
- // Add src, same as dst
- ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
-
- OptionalImmIndexMap OptionalIdx;
-
- for (unsigned E = Operands.size(); I != E; ++I) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-
- // Add the register arguments
- if (Op.isRegOrImm()) {
- Op.addRegOrImmOperands(Inst, 1);
- continue;
- } else if (Op.isImmModifier()) {
- OptionalIdx[Op.getImmTy()] = I;
- } else {
- llvm_unreachable("unexpected operand type");
- }
- }
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ cvtMIMG(Inst, Operands, true);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
@@ -4118,25 +4088,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
return MatchOperand_NoMatch;
}
-void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) {
- unsigned I = 1;
- const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
- for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
- ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
- }
- for (unsigned E = Operands.size(); I != E; ++I)
- ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1);
-}
-
-void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) {
- uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
- if (TSFlags & SIInstrFlags::VOP3) {
- cvtVOP3(Inst, Operands);
- } else {
- cvtId(Inst, Operands);
- }
-}
-
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
// 1. This operand is input modifiers
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
@@ -4148,91 +4099,78 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
-void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
- OptionalImmIndexMap &OptionalIdx) {
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx) {
+ unsigned Opc = Inst.getOpcode();
+
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
- for (unsigned E = Operands.size(); I != E; ++I) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
- Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
- } else if (Op.isImmModifier()) {
- OptionalIdx[Op.getImmTy()] = I;
- } else if (Op.isRegOrImm()) {
- Op.addRegOrImmOperands(Inst, 1);
- } else {
- llvm_unreachable("unhandled operand type");
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
+ // This instruction has src modifiers
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isImmModifier()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else if (Op.isRegOrImm()) {
+ Op.addRegOrImmOperands(Inst, 1);
+ } else {
+ llvm_unreachable("unhandled operand type");
+ }
+ }
+ } else {
+ // No src modifiers
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (Op.isMod()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ Op.addRegOrImmOperands(Inst, 1);
+ }
}
}
-}
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
-
- cvtVOP3Impl(Inst, Operands, OptionalIdx);
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+ }
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+ }
// special case v_mac_{f16, f32}:
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0
- if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
- Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
- Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) {
+ if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
+ Opc == AMDGPU::V_MAC_F16_e64_vi) {
auto it = Inst.begin();
- std::advance(
- it,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ?
- AMDGPU::V_MAC_F16_e64 :
- AMDGPU::V_MAC_F32_e64,
- AMDGPU::OpName::src2_modifiers));
+ std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
++it;
Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}
}
-void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
-
- unsigned I = 1;
- const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
- for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
- ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
- }
-
- for (unsigned E = Operands.size(); I != E; ++I) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- if (Op.isMod()) {
- OptionalIdx[Op.getImmTy()] = I;
- } else {
- Op.addRegOrImmOperands(Inst, 1);
- }
- }
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+ cvtVOP3(Inst, Operands, OptionalIdx);
}
void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptIdx;
- cvtVOP3Impl(Inst, Operands, OptIdx);
+ cvtVOP3(Inst, Operands, OptIdx);
// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers
int Opc = Inst.getOpcode();
- if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
- addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
- }
-
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
@@ -4284,7 +4222,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
- Inst.getOperand(ModIdx).setImm(ModVal);
+ Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
}
}
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 917d9cfa69054..971208c5db847 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPULowerIntrinsics.cpp
+ AMDGPUMacroFusion.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineCFGStructurizer.cpp
AMDGPUMachineFunction.cpp
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 8ead480673363..2e7641cda3755 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
namespace llvm {
std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index d378df674be9b..0657f67b217de 100644
--- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
namespace {
class GCNMinRegScheduler {
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 390a8286c76a8..1d02c7fdffbf5 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8ec46665daf56..155b400ba022b 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/MathExtras.h"
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index 3ed3cd5b3b1ce..060d2ca72d93d 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -66,7 +66,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
const SIMachineFunctionInfo &MFI;
- // Occupancy target at the begining of function scheduling cycle.
+ // Occupancy target at the beginning of function scheduling cycle.
unsigned StartingOccupancy;
// Minimal real occupancy recorder for the function.
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 2b408ff10caae..a50e3eb8d9cee 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -32,7 +32,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
@@ -100,7 +100,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
Value = adjustFixupValue(Fixup, Value, &Asm.getContext());
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index a515eecc222af..06e2c11b01935 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -26,6 +26,7 @@ class MIMG_Helper <dag outs, dag ins, string asm,
let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
let AsmMatchConverter = "cvtMIMG";
let usesCustomInserter = 1;
+ let SchedRW = [WriteVMEM];
}
class MIMG_NoSampler_Helper <bits<7> op, string asm,
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 6993e8a62a9c2..00cbd24b84fbc 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -555,7 +555,7 @@ public:
CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
} else
CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
-
+ LLVM_FALLTHROUGH;
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 215791f4f92dd..69a63b6941ef2 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1618,7 +1618,8 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
-bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const {
+bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
+ const SelectionDAG &DAG) const {
// Local and Private addresses do not handle vectors. Limit to i32
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index d6a0876a6ee7d..2a774693f02b3 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -44,7 +44,8 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const override;
- bool canMergeStoresTo(unsigned AS, EVT MemVT) const override;
+ bool canMergeStoresTo(unsigned AS, EVT MemVT,
+ const SelectionDAG &DAG) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 47fda1c8fa827..a7e540f9d14d3 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f391f67a241f1..3af242d9ea660 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -137,6 +137,7 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
= TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
}
+ return false;
}
default:
return false;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d39b345bdf032..2ba570b9ebbbc 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -547,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = 0;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
- Info.vol = !Vol || !Vol->isNullValue();
+ Info.vol = !Vol || !Vol->isZero();
Info.readMem = true;
Info.writeMem = true;
return true;
@@ -713,7 +713,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
}
-bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const {
+bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
+ const SelectionDAG &DAG) const {
if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
@@ -2374,20 +2375,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IID) {
- case Intrinsic::amdgcn_cvt_pkrtz: {
+ if (IID == Intrinsic::amdgcn_cvt_pkrtz) {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
SDLoc SL(N);
SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
Src0, Src1);
-
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
return;
}
- default:
- break;
- }
+ break;
}
case ISD::SELECT: {
SDLoc SL(N);
@@ -3736,7 +3733,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT VT = Op.getValueType();
- bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags Flags = Op->getFlags();
+ bool Unsafe = DAG.getTarget().Options.UnsafeFPMath ||
+ Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal();
if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
return SDValue();
@@ -3771,15 +3770,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
}
}
- const SDNodeFlags Flags = Op->getFlags();
-
- if (Unsafe || Flags.hasAllowReciprocal()) {
+ if (Unsafe) {
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
- SDNodeFlags NewFlags;
- NewFlags.setUnsafeAlgebra(true);
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
}
return SDValue();
@@ -4622,15 +4617,99 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
return SDValue();
}
+static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
+ if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+ return true;
+
+ return DAG.isKnownNeverNaN(Op);
+}
+
+static bool isCanonicalized(SDValue Op, const SISubtarget *ST,
+ unsigned MaxDepth=5) {
+ // If source is a result of another standard FP operation it is already in
+ // canonical form.
+
+ switch (Op.getOpcode()) {
+ default:
+ break;
+
+ // These will flush denorms if required.
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FSQRT:
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FMA:
+ case ISD::FMAD:
+
+ case ISD::FCANONICALIZE:
+ return true;
+
+ case ISD::FP_ROUND:
+ return Op.getValueType().getScalarType() != MVT::f16 ||
+ ST->hasFP16Denormals();
+
+ case ISD::FP_EXTEND:
+ return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
+ ST->hasFP16Denormals();
+
+ case ISD::FP16_TO_FP:
+ case ISD::FP_TO_FP16:
+ return ST->hasFP16Denormals();
+
+ // It can/will be lowered or combined as a bit operation.
+ // Need to check their input recursively to handle.
+ case ISD::FNEG:
+ case ISD::FABS:
+ return (MaxDepth > 0) &&
+ isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1);
+
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FSINCOS:
+ return Op.getValueType().getScalarType() != MVT::f16;
+
+ // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
+ // For such targets need to check their input recursively.
+ // TODO: on GFX9+ we could return true without checking provided no-nan
+ // mode, since canonicalization is also used to quiet sNaNs.
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+
+ return (MaxDepth > 0) &&
+ isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) &&
+ isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1);
+
+ case ISD::ConstantFP: {
+ auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ return !F.isDenormal() && !(F.isNaN() && F.isSignaling());
+ }
+ }
+ return false;
+}
+
// Constant fold canonicalize.
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
- if (!CFP)
+
+ if (!CFP) {
+ SDValue N0 = N->getOperand(0);
+
+ bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+ if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
+ isCanonicalized(N0, getSubtarget()))
+ return N0;
+
return SDValue();
+ }
- SelectionDAG &DAG = DCI.DAG;
const APFloat &C = CFP->getValueAPF();
// Flush denormals to 0 if not enabled.
@@ -4723,13 +4802,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine(
return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
}
-static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
- if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
- return true;
-
- return DAG.isKnownNeverNaN(Op);
-}
-
SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Op0,
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 24f88e632d38e..83392a7ab1b21 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -153,7 +153,8 @@ public:
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- bool canMergeStoresTo(unsigned AS, EVT MemVT) const override;
+ bool canMergeStoresTo(unsigned AS, EVT MemVT,
+ const SelectionDAG &DAG) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index b6784ec14e9f8..160f8837d49c8 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2022,10 +2022,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
return nullptr;
case AMDGPU::V_MAC_F16_e64:
IsF16 = true;
+ LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e64:
break;
case AMDGPU::V_MAC_F16_e32:
IsF16 = true;
+ LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e32: {
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::src0);
@@ -4320,6 +4322,24 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const
return new GCNHazardRecognizer(MF);
}
+std::pair<unsigned, unsigned>
+SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ { MO_GOTPCREL, "amdgpu-gotprel" },
+ { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
+ { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
+ { MO_REL32_LO, "amdgpu-rel32-lo" },
+ { MO_REL32_HI, "amdgpu-rel32-hi" }
+ };
+
+ return makeArrayRef(TargetFlags);
+}
+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 74b48c7618087..d00c0d4a7f4ea 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -100,6 +100,8 @@ protected:
public:
enum TargetOperandFlags {
+ MO_MASK = 0x7,
+
MO_NONE = 0,
// MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
MO_GOTPCREL = 1,
@@ -781,9 +783,15 @@ public:
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
MachineBasicBlock *LoopEnd) const;
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
ArrayRef<std::pair<int, const char *>>
getSerializableTargetIndices() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 4a81fb3b463a9..ffb01363e1313 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1502,6 +1502,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
+
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index bb17dbbdfbd62..34886c48f461d 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -38,7 +38,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
// This scheduler implements a different scheduling algorithm than
// GenericScheduler.
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 96a18544f02ac..874fbadca7f35 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -110,10 +110,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
}
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- const MachineOperand *Src1Mod =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
-
- if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
+ if (Src1 && (!isVGPR(Src1, TRI, MRI) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
return false;
// We don't need to check src0, all input types are legal, so just make sure
@@ -122,58 +120,64 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
return false;
// Check output modifiers
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
- return false;
-
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
+ return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
}
/// \brief This function checks \p MI for operands defined by a move immediate
/// instruction and then folds the literal constant into the instruction if it
-/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
-/// and will only fold literal constants if we are still in SSA.
-static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
+/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
+static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineRegisterInfo &MRI, bool TryToCommute = true) {
-
- if (!MRI.isSSA())
- return;
-
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- // Only one literal constant is allowed per instruction, so if src0 is a
- // literal constant then we can't do any folding.
- if (TII->isLiteralConstant(MI, Src0Idx))
- return;
-
// Try to fold Src0
MachineOperand &Src0 = MI.getOperand(Src0Idx);
- if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
+ if (Src0.isReg()) {
unsigned Reg = Src0.getReg();
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
- if (Def && Def->isMoveImmediate()) {
- MachineOperand &MovSrc = Def->getOperand(1);
- bool ConstantFolded = false;
-
- if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
- isUInt<32>(MovSrc.getImm()))) {
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- }
- if (ConstantFolded) {
- if (MRI.use_empty(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && Def->isMoveImmediate()) {
+ MachineOperand &MovSrc = Def->getOperand(1);
+ bool ConstantFolded = false;
+
+ if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+ isUInt<32>(MovSrc.getImm()))) {
+ // It's possible to have only one component of a super-reg defined by
+ // a single mov, so we need to clear any subregister flag.
+ Src0.setSubReg(0);
+ Src0.ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ } else if (MovSrc.isFI()) {
+ Src0.setSubReg(0);
+ Src0.ChangeToFrameIndex(MovSrc.getIndex());
+ ConstantFolded = true;
+ }
+
+ if (ConstantFolded) {
+ assert(MRI.use_empty(Reg));
Def->eraseFromParent();
- ++NumLiteralConstantsFolded;
- return;
+ ++NumLiteralConstantsFolded;
+ return true;
+ }
}
}
}
// We have failed to fold src0, so commute the instruction and try again.
- if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI))
- foldImmediates(MI, TII, MRI, false);
+ if (TryToCommute && MI.isCommutable()) {
+ if (TII->commuteInstruction(MI)) {
+ if (foldImmediates(MI, TII, MRI, false))
+ return true;
+ // Commute back.
+ TII->commuteInstruction(MI);
+ }
+ }
+
+ return false;
}
// Copy MachineOperand with all flags except setting it as implicit.
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 9908fc003ce70..92fb762ebd731 100644
--- a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
-/// \brief The target which suports all AMD GPUs. This will eventually
+/// \brief The target which supports all AMD GPUs. This will eventually
/// be deprecated and there will be a R600 target and a GCN target.
Target &llvm::getTheAMDGPUTarget() {
static Target TheAMDGPUTarget;
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 96d343099132c..f2de1f9957260 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -16,12 +16,21 @@ class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag>
!if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
>;
-// Non-packed instructions that use the VOP3P encoding. i.e. where
-// omod/abs are used.
+// Non-packed instructions that use the VOP3P encoding.
+// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
- VOP3P_Pseudo<OpName, P,
- !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
->;
+ VOP3P_Pseudo<OpName, P> {
+ let InOperandList =
+ (ins
+ FP32InputMods:$src0_modifiers, VCSrc_f32:$src0,
+ FP32InputMods:$src1_modifiers, VCSrc_f32:$src1,
+ FP32InputMods:$src2_modifiers, VCSrc_f32:$src2,
+ clampmod:$clamp,
+ op_sel:$op_sel,
+ op_sel_hi:$op_sel_hi);
+ let AsmOperands =
+ " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp";
+}
let isCommutable = 1 in {
def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
@@ -46,9 +55,12 @@ def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I1
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
// XXX - Commutable?
-def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
-def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+// These are VOP3a-like opcodes which accept no omod.
+// Size of src arguments (16/32) is controlled by op_sel.
+// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
+def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_V2F16_V2F16_V2F16>>;
+def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
multiclass VOP3P_Real_vi<bits<10> op> {
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index e386f21c2ba49..77b7952b22a88 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -51,12 +51,8 @@ class VOP3Common <dag outs, dag ins, string asm = "",
let VOP3 = 1;
- let AsmMatchConverter =
- !if(!eq(VOP3Only,1),
- "cvtVOP3",
- !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
-
let AsmVariantName = AMDGPUAsmVariants.VOP3;
+ let AsmMatchConverter = !if(!eq(HasMods,1), "cvtVOP3", "");
let isCodeGenOnly = 0;
@@ -106,13 +102,11 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
- !if(!eq(VOP3Only,1),
- !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
- !if(!eq(P.HasModifiers, 1),
- "cvtVOP3_2_mod",
- !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
- )
- );
+ !if(!and(P.IsPacked, isVOP3P),
+ "cvtVOP3P",
+ !if(!or(P.HasModifiers, P.HasOMod),
+ "cvtVOP3",
+ ""));
VOPProfile Pfl = P;
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 90f635c812542..582153daebde9 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
StartOp = 2; NumOffset = 2;
+ LLVM_FALLTHROUGH;
case ARM::STMDB_UPD:
case ARM::t2STMDB_UPD:
case ARM::VSTMDDB_UPD:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ec6b24b2ed67..3cf5950a1918d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1880,6 +1880,9 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
// Diamond: TBB is the block that is branched to, FBB is the fallthrough
TUnpredCycles = TCycles + TakenBranchCost;
FUnpredCycles = FCycles + NotTakenBranchCost;
+ // The branch at the end of FBB will disappear when it's predicated, so
+ // discount it from PredCost.
+ PredCost -= 1 * ScalingUpFactor;
}
// The total cost is the cost of each path scaled by their probabilites
unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b4fb292c0116d..e97a7ce5067f9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned R = 0; R < 16; ++R)
markSuperRegs(Reserved, ARM::D16 + R);
}
- const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
- for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
- for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
- if (Reserved.test(*SI)) markSuperRegs(Reserved, *I);
+ const TargetRegisterClass &RC = ARM::GPRPairRegClass;
+ for (unsigned Reg : RC)
+ for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI))
+ markSuperRegs(Reserved, Reg);
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
@@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
- for (unsigned I = 0, E = Order.size(); I != E; ++I) {
- unsigned Reg = Order[I];
+ for (unsigned Reg : Order) {
if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
continue;
// Don't provide hints that are paired to a reserved register.
@@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
unsigned i = 0;
-
- while (!MI->getOperand(i).isFI()) {
- ++i;
- assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
- }
+ for (; !MI->getOperand(i).isFI(); ++i)
+ assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!");
// AddrMode4 and AddrMode6 cannot handle any offset.
if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index e498f70b820db..051827a6a6a2f 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -321,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- // The necesary extensions are handled on the other side of the ABI
+ // The necessary extensions are handled on the other side of the ABI
// boundary.
markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e42514acd76f0..6ba7593543a92 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3398,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc dl(Op);
- ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
- auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
- if (Scope == SynchronizationScope::SingleThread)
+ ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
+ auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
+ if (SSID == SyncScope::SingleThread)
return Op;
if (!Subtarget->hasDataBarrier()) {
@@ -5356,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
- case ISD::SETNE: Invert = true;
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
- case ISD::SETLT: Swap = true;
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break;
- case ISD::SETLE: Swap = true;
+ case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGE: Opc = ARMISD::VCGE; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
}
@@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SV->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
SubVecs[SV].push_back(SubVec);
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5044134f5b1e2..f05b142552369 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -510,7 +510,8 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
- bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override {
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 53db5acbe805c..42eac12e457b2 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4799,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm",
// Pseudo instruction ldr Rt, =immediate
def t2LDRConstPool
: t2AsmPseudo<"ldr${p} $Rt, $immediate",
- (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
+ (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $immediate",
(t2LDRConstPool GPRnopc:$Rt,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 374176d1d7371..29ef69ad0010f 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -20,6 +20,8 @@
#define DEBUG_TYPE "arm-isel"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -42,13 +44,32 @@ public:
private:
bool selectImpl(MachineInstr &I) const;
- bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const;
+ struct CmpConstants;
+ struct InsertInfo;
+
+ bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
+ // Helper for inserting a comparison sequence that sets \p ResReg to either 1
+ // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or
+ // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS).
+ bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg,
+ ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const;
+
+ // Set \p DestReg to \p Constant.
+ void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const;
+
+ bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const;
+
+ // Check if the types match and both operands have the expected size and
+ // register bank.
+ bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS,
+ unsigned ExpectedSize, unsigned ExpectedRegBankID) const;
- bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const;
+ // Check if the register has the expected size and register bank.
+ bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const;
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -251,120 +272,233 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
return Opc;
}
-static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+// When lowering comparisons, we sometimes need to perform two compares instead
+// of just one. Get the condition codes for both comparisons. If only one is
+// needed, the second member of the pair is ARMCC::AL.
+static std::pair<ARMCC::CondCodes, ARMCC::CondCodes>
+getComparePreds(CmpInst::Predicate Pred) {
+ std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL};
switch (Pred) {
- // Needs two compares...
case CmpInst::FCMP_ONE:
+ Preds = {ARMCC::GT, ARMCC::MI};
+ break;
case CmpInst::FCMP_UEQ:
- default:
- // AL is our "false" for now. The other two need more compares.
- return ARMCC::AL;
+ Preds = {ARMCC::EQ, ARMCC::VS};
+ break;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
- return ARMCC::EQ;
+ Preds.first = ARMCC::EQ;
+ break;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
- return ARMCC::GT;
+ Preds.first = ARMCC::GT;
+ break;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
- return ARMCC::GE;
+ Preds.first = ARMCC::GE;
+ break;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
- return ARMCC::HI;
+ Preds.first = ARMCC::HI;
+ break;
case CmpInst::FCMP_OLT:
- return ARMCC::MI;
+ Preds.first = ARMCC::MI;
+ break;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
- return ARMCC::LS;
+ Preds.first = ARMCC::LS;
+ break;
case CmpInst::FCMP_ORD:
- return ARMCC::VC;
+ Preds.first = ARMCC::VC;
+ break;
case CmpInst::FCMP_UNO:
- return ARMCC::VS;
+ Preds.first = ARMCC::VS;
+ break;
case CmpInst::FCMP_UGE:
- return ARMCC::PL;
+ Preds.first = ARMCC::PL;
+ break;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
- return ARMCC::LT;
+ Preds.first = ARMCC::LT;
+ break;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
- return ARMCC::LE;
+ Preds.first = ARMCC::LE;
+ break;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
- return ARMCC::NE;
+ Preds.first = ARMCC::NE;
+ break;
case CmpInst::ICMP_UGE:
- return ARMCC::HS;
+ Preds.first = ARMCC::HS;
+ break;
case CmpInst::ICMP_ULT:
- return ARMCC::LO;
+ Preds.first = ARMCC::LO;
+ break;
+ default:
+ break;
}
+ assert(Preds.first != ARMCC::AL && "No comparisons needed?");
+ return Preds;
}
-bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
- const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
- auto &MBB = *MIB->getParent();
- auto InsertBefore = std::next(MIB->getIterator());
- auto &DebugLoc = MIB->getDebugLoc();
-
- // Move 0 into the result register.
- auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi))
- .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass))
- .addImm(0)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
- if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI))
+struct ARMInstructionSelector::CmpConstants {
+ CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank,
+ unsigned OpSize)
+ : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode),
+ OperandRegBankID(OpRegBank), OperandSize(OpSize) {}
+
+ // The opcode used for performing the comparison.
+ const unsigned ComparisonOpcode;
+
+ // The opcode used for reading the flags set by the comparison. May be
+ // ARM::INSTRUCTION_LIST_END if we don't need to read the flags.
+ const unsigned ReadFlagsOpcode;
+
+ // The assumed register bank ID for the operands.
+ const unsigned OperandRegBankID;
+
+ // The assumed size in bits for the operands.
+ const unsigned OperandSize;
+};
+
+struct ARMInstructionSelector::InsertInfo {
+ InsertInfo(MachineInstrBuilder &MIB)
+ : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())),
+ DbgLoc(MIB->getDebugLoc()) {}
+
+ MachineBasicBlock &MBB;
+ const MachineBasicBlock::instr_iterator InsertBefore;
+ const DebugLoc &DbgLoc;
+};
+
+void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg,
+ unsigned Constant) const {
+ (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi))
+ .addDef(DestReg)
+ .addImm(Constant)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+}
+
+bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ return MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
+ validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) &&
+ validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID);
+}
+
+bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) {
+ DEBUG(dbgs() << "Unexpected size for register");
return false;
+ }
- // Perform the comparison.
- auto LHSReg = MIB->getOperand(2).getReg();
- auto RHSReg = MIB->getOperand(3).getReg();
- assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
- MRI.getType(LHSReg).getSizeInBits() == 32 &&
- MRI.getType(RHSReg).getSizeInBits() == 32 &&
- "Unsupported types for comparison operation");
- auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr))
- .addUse(LHSReg)
- .addUse(RHSReg)
- .add(predOps(ARMCC::AL));
- if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) {
+ DEBUG(dbgs() << "Unexpected register bank for register");
return false;
+ }
+
+ return true;
+}
+
+bool ARMInstructionSelector::selectCmp(CmpConstants Helper,
+ MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ const InsertInfo I(MIB);
- // Move 1 into the result register if the flags say so.
auto ResReg = MIB->getOperand(0).getReg();
+ if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID))
+ return false;
+
auto Cond =
static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate());
- auto ARMCond = getComparePred(Cond);
- if (ARMCond == ARMCC::AL)
+ if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) {
+ putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0);
+ MIB->eraseFromParent();
+ return true;
+ }
+
+ auto LHSReg = MIB->getOperand(2).getReg();
+ auto RHSReg = MIB->getOperand(3).getReg();
+ if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize,
+ Helper.OperandRegBankID))
return false;
- auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi))
+ auto ARMConds = getComparePreds(Cond);
+ auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ putConstant(I, ZeroReg, 0);
+
+ if (ARMConds.second == ARMCC::AL) {
+ // Simple case, we only need one comparison and we're done.
+ if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg,
+ ZeroReg))
+ return false;
+ } else {
+ // Not so simple, we need two successive comparisons.
+ auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg,
+ RHSReg, ZeroReg))
+ return false;
+ if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg,
+ IntermediateRes))
+ return false;
+ }
+
+ MIB->eraseFromParent();
+ return true;
+}
+
+bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I,
+ unsigned ResReg,
+ ARMCC::CondCodes Cond,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const {
+ // Perform the comparison.
+ auto CmpI =
+ BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode))
+ .addUse(LHSReg)
+ .addUse(RHSReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Read the comparison flags (if necessary).
+ if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) {
+ auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc,
+ TII.get(Helper.ReadFlagsOpcode))
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI))
+ return false;
+ }
+
+ // Select either 1 or the previous result based on the value of the flags.
+ auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi))
.addDef(ResReg)
- .addUse(Mov0I->getOperand(0).getReg())
+ .addUse(PrevRes)
.addImm(1)
- .add(predOps(ARMCond, ARM::CPSR));
+ .add(predOps(Cond, ARM::CPSR));
if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
return false;
- MIB->eraseFromParent();
return true;
}
bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
- const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
+ MachineRegisterInfo &MRI) const {
auto &MBB = *MIB->getParent();
auto InsertBefore = std::next(MIB->getIterator());
- auto &DebugLoc = MIB->getDebugLoc();
+ auto &DbgLoc = MIB->getDebugLoc();
// Compare the condition to 0.
auto CondReg = MIB->getOperand(1).getReg();
- assert(MRI.getType(CondReg).getSizeInBits() == 1 &&
- RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri))
+ auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri))
.addUse(CondReg)
.addImm(0)
.add(predOps(ARMCC::AL));
@@ -376,13 +510,10 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
auto ResReg = MIB->getOperand(0).getReg();
auto TrueReg = MIB->getOperand(2).getReg();
auto FalseReg = MIB->getOperand(3).getReg();
- assert(MRI.getType(ResReg) == MRI.getType(TrueReg) &&
- MRI.getType(TrueReg) == MRI.getType(FalseReg) &&
- MRI.getType(FalseReg).getSizeInBits() == 32 &&
- RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
+ validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr))
+ auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr))
.addDef(ResReg)
.addUse(TrueReg)
.addUse(FalseReg)
@@ -494,10 +625,32 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
- case G_ICMP:
- return selectICmp(MIB, TII, MRI, TRI, RBI);
case G_SELECT:
- return selectSelect(MIB, TII, MRI, TRI, RBI);
+ return selectSelect(MIB, MRI);
+ case G_ICMP: {
+ CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
+ ARM::GPRRegBankID, 32);
+ return selectCmp(Helper, MIB, MRI);
+ }
+ case G_FCMP: {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP");
+
+ unsigned OpReg = I.getOperand(2).getReg();
+ unsigned Size = MRI.getType(OpReg).getSizeInBits();
+
+ if (Size == 64 && TII.getSubtarget().isFPOnlySP()) {
+ DEBUG(dbgs() << "Subtarget only supports single precision");
+ return false;
+ }
+ if (Size != 32 && Size != 64) {
+ DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
+ return false;
+ }
+
+ CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
+ ARM::FPRRegBankID, Size);
+ return selectCmp(Helper, MIB, MRI);
+ }
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
@@ -510,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
break;
case G_CONSTANT: {
unsigned Reg = I.getOperand(0).getReg();
- if (MRI.getType(Reg).getSizeInBits() != 32)
+
+ if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID))
return false;
- assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- "Expected constant to live in a GPR");
I.setDesc(TII.get(ARM::MOVi));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index f3e62d09cc30a..f23e62595d2e9 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -28,6 +28,10 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
+static bool AEABI(const ARMSubtarget &ST) {
+ return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI();
+}
+
ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
using namespace TargetOpcode;
@@ -66,8 +70,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (unsigned Op : {G_SREM, G_UREM})
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Lower);
- else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() ||
- ST.isTargetMuslAEABI())
+ else if (AEABI(ST))
setAction({Op, s32}, Custom);
else
setAction({Op, s32}, Libcall);
@@ -86,6 +89,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_SELECT, 1, s1}, Legal);
setAction({G_CONSTANT, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_CONSTANT, Ty}, WidenScalar);
setAction({G_ICMP, s1}, Legal);
for (auto Ty : {s8, s16})
@@ -99,9 +104,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_LOAD, s64}, Legal);
setAction({G_STORE, s64}, Legal);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Legal);
+ setAction({G_FCMP, 1, s64}, Legal);
} else {
for (auto Ty : {s32, s64})
setAction({G_FADD, Ty}, Libcall);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Custom);
+ setAction({G_FCMP, 1, s64}, Custom);
+
+ if (AEABI(ST))
+ setFCmpLibcallsAEABI();
+ else
+ setFCmpLibcallsGNU();
}
for (unsigned Op : {G_FREM, G_FPOW})
@@ -111,11 +129,120 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
computeTables();
}
+void ARMLegalizerInfo::setFCmpLibcallsAEABI() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+}
+
+void ARMLegalizerInfo::setFCmpLibcallsGNU() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F32, CmpInst::ICMP_NE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F64, CmpInst::ICMP_NE}};
+}
+
+ARMLegalizerInfo::FCmpLibcallsList
+ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
+ unsigned Size) const {
+ assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate");
+ if (Size == 32)
+ return FCmp32Libcalls[Predicate];
+ if (Size == 64)
+ return FCmp64Libcalls[Predicate];
+ llvm_unreachable("Unsupported size for FCmp predicate");
+}
+
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
using namespace TargetOpcode;
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return false;
@@ -137,9 +264,9 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
auto RetVal = MRI.createGenericVirtualRegister(
getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
- auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy},
- {{MI.getOperand(1).getReg(), ArgTy},
- {MI.getOperand(2).getReg(), ArgTy}});
+ auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
+ {{MI.getOperand(1).getReg(), ArgTy},
+ {MI.getOperand(2).getReg(), ArgTy}});
if (Status != LegalizerHelper::Legalized)
return false;
@@ -149,8 +276,76 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MIRBuilder.buildUnmerge(
{MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
RetVal);
+ break;
+ }
+ case G_FCMP: {
+ assert(MRI.getType(MI.getOperand(2).getReg()) ==
+ MRI.getType(MI.getOperand(3).getReg()) &&
+ "Mismatched operands for G_FCMP");
+ auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+
+ auto OriginalResult = MI.getOperand(0).getReg();
+ auto Predicate =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto Libcalls = getFCmpLibcalls(Predicate, OpSize);
+
+ if (Libcalls.empty()) {
+ assert((Predicate == CmpInst::FCMP_TRUE ||
+ Predicate == CmpInst::FCMP_FALSE) &&
+ "Predicate needs libcalls, but none specified");
+ MIRBuilder.buildConstant(OriginalResult,
+ Predicate == CmpInst::FCMP_TRUE ? 1 : 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size");
+ auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+ auto *RetTy = Type::getInt32Ty(Ctx);
+
+ SmallVector<unsigned, 2> Results;
+ for (auto Libcall : Libcalls) {
+ auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ auto Status =
+ createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy},
+ {{MI.getOperand(2).getReg(), ArgTy},
+ {MI.getOperand(3).getReg(), ArgTy}});
+
+ if (Status != LegalizerHelper::Legalized)
+ return false;
- return LegalizerHelper::Legalized;
+ auto ProcessedResult =
+ Libcalls.size() == 1
+ ? OriginalResult
+ : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult));
+
+ // We have a result, but we need to transform it into a proper 1-bit 0 or
+ // 1, taking into account the different peculiarities of the values
+ // returned by the comparison functions.
+ CmpInst::Predicate ResultPred = Libcall.Predicate;
+ if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) {
+ // We have a nice 0 or 1, and we just need to truncate it back to 1 bit
+ // to keep the types consistent.
+ MIRBuilder.buildTrunc(ProcessedResult, LibcallResult);
+ } else {
+ // We need to compare against 0.
+ assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate");
+ auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero);
+ }
+ Results.push_back(ProcessedResult);
+ }
+
+ if (Results.size() != 1) {
+ assert(Results.size() == 2 && "Unexpected number of results");
+ MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]);
+ }
+ break;
}
}
+
+ MI.eraseFromParent();
+ return true;
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index a9bdd367737e5..78ab9412c04ba 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -14,7 +14,10 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
@@ -27,6 +30,36 @@ public:
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ void setFCmpLibcallsGNU();
+ void setFCmpLibcallsAEABI();
+
+ struct FCmpLibcallInfo {
+ // Which libcall this is.
+ RTLIB::Libcall LibcallID;
+
+ // The predicate to be used when comparing the value returned by the
+ // function with a relevant constant (currently hard-coded to zero). This is
+ // necessary because often the libcall will return e.g. a value greater than
+ // 0 to represent 'true' and anything negative to represent 'false', or
+ // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is
+ // needed, this should be CmpInst::BAD_ICMP_PREDICATE.
+ CmpInst::Predicate Predicate;
+ };
+ using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>;
+
+ // Map from each FCmp predicate to the corresponding libcall infos. A FCmp
+ // instruction may be lowered to one or two libcalls, which is why we need a
+ // list. If two libcalls are needed, their results will be OR'ed.
+ using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>;
+
+ FCmpLibcallsMapTy FCmp32Libcalls;
+ FCmpLibcallsMapTy FCmp64Libcalls;
+
+ // Get the libcall(s) corresponding to \p Predicate for operands of \p Size
+ // bits.
+ FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 11fb81a4f9fea..c0c09e8c15afd 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-
unsigned NumOperands = MI.getNumOperands();
const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
@@ -236,26 +234,31 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
case G_LOAD:
- case G_STORE:
+ case G_STORE: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
OperandsMapping =
Ty.getSizeInBits() == 64
? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
&ARM::ValueMappings[ARM::GPR3OpsIdx]})
: &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
- case G_FADD:
+ }
+ case G_FADD: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
"Unsupported size for G_FADD");
OperandsMapping = Ty.getSizeInBits() == 64
? &ARM::ValueMappings[ARM::DPR3OpsIdx]
: &ARM::ValueMappings[ARM::SPR3OpsIdx];
break;
+ }
case G_CONSTANT:
case G_FRAME_INDEX:
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
case G_SELECT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
(void)Ty2;
assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
@@ -277,9 +280,29 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&ARM::ValueMappings[ARM::GPR3OpsIdx]});
break;
}
+ case G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP");
+ assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+ "Mismatched operand sizes for G_FCMP");
+
+ unsigned Size = Ty1.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::DPR3OpsIdx];
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ FPRValueMapping, FPRValueMapping});
+ break;
+ }
case G_MERGE_VALUES: {
// We only support G_MERGE_VALUES for creating a double precision floating
// point value out of two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
@@ -294,6 +317,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_UNMERGE_VALUES: {
// We only support G_UNMERGE_VALUES for splitting a double precision
// floating point value into two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 ||
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8eb9dbf5f9de6..51b0fedd2b54f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,6 +15,24 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // To inline a callee, all features not in the whitelist must match exactly.
+ bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
+ (CalleeBits & ~InlineFeatureWhitelist);
+ // For features in the whitelist, the callee's features must be a subset of
+ // the callers'.
+ bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
+ (CalleeBits & InlineFeatureWhitelist);
+ return MatchExact && MatchSubset;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 8a1a378638779..0695a4e633467 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
+ // Currently the following features are excluded from InlineFeatureWhitelist.
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // Depending on whether they are set or unset, different
+ // instructions/registers are available. For example, inlining a callee with
+ // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
+ // fail if the callee uses ARM only instructions, e.g. in inline asm.
+ const FeatureBitset InlineFeatureWhitelist = {
+ ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
+ ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
+ ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
+ ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
+ ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
+ ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
+ ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
+ ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
+ ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
+ ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
+ ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
+ ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
+ ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
+ ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
+ ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates
+ };
+
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -41,6 +74,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
bool enableInterleavedAccessVectorization() { return true; }
/// Floating-point computation using ARMv8 AArch32 Advanced
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 891b5c60e1fd6..1129826f21f64 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5249,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Fall though for the Identifier case that is not a register or a
// special name.
+ LLVM_FALLTHROUGH;
}
case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
case AsmToken::Integer: // things like 1f and 2b as a branch targets
@@ -8992,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
return PlainMatchResult;
}
+std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS);
+
static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
@@ -9085,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_MnemonicFail:
- return Error(IDLoc, "invalid instruction",
+ case Match_MnemonicFail: {
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = ARMMnemonicSpellCheck(
+ ((ARMOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion,
((ARMOperand &)*Operands[0]).getLocRange());
+ }
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 22de728fe06e1..a77df7a2598f4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -361,9 +361,8 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target, uint64_t Value,
- bool IsPCRel, MCContext &Ctx,
- bool IsLittleEndian,
- bool IsResolved) const {
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const {
unsigned Kind = Fixup.getKind();
// MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT
@@ -392,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case FK_SecRel_4:
return Value;
case ARM::fixup_arm_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_arm_movw_lo16: {
@@ -404,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return Value;
}
case ARM::fixup_t2_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16: {
@@ -885,11 +884,11 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
- Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx,
- IsLittleEndian, true);
+ Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx,
+ IsLittleEndian);
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 84b54bbb9a49b..02374966dafe7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -42,13 +42,13 @@ public:
const MCValue &Target) override;
unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, uint64_t Value, bool IsPCRel,
- MCContext &Ctx, bool IsLittleEndian,
- bool IsResolved) const;
+ const MCValue &Target, uint64_t Value,
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 0b6574c37de12..5709b4e617987 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R12:
if (STI.splitFramePushPop(MF))
break;
- // fallthough
+ LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index f0c7b11895b4a..c058c9e1f5348 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -149,7 +149,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
(void)MO;
assert(MO.isReg() && "Unexpected inline asm memory operand");
- // TODO: We can look up the alternative name for the register if it's given.
+ // TODO: We should be able to look up the alternative name for
+ // the register if it's given.
+ // TableGen doesn't expose a way of getting retrieving names
+ // for registers.
if (MI->getOperand(OpNum).getReg() == AVR::R31R30) {
O << "Z";
} else {
diff --git a/lib/Target/AVR/AVRDevices.td b/lib/Target/AVR/AVRDevices.td
index 9224af613d148..62def45744372 100644
--- a/lib/Target/AVR/AVRDevices.td
+++ b/lib/Target/AVR/AVRDevices.td
@@ -6,7 +6,6 @@
// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
// avr2 (with SRAM) adds the rest of the variants.
-// :TODO: s/AVRTiny/Tiny
// A feature set aggregates features, grouping them. We don't want to create a
@@ -136,7 +135,7 @@ def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
-def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
@@ -189,7 +188,7 @@ def FamilyAVR51 : Family<"avr51",
def FamilyAVR6 : Family<"avr6",
[FamilyAVR51]>;
-def FamilyAVRTiny : Family<"avrtiny",
+def FamilyTiny : Family<"avrtiny",
[FamilyAVR0, FeatureBREAK, FeatureSRAM,
FeatureTinyEncoding]>;
@@ -240,7 +239,7 @@ def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
// Specific MCUs
def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
@@ -480,12 +479,12 @@ def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny102", FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny104", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny4", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny5", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny9", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny10", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp
index afba66b2e69bb..744aa723c416c 100644
--- a/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/lib/Target/AVR/AVRInstrInfo.cpp
@@ -402,7 +402,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded) const {
- assert(!BytesAdded && "code size not handled");
+ if (BytesAdded) *BytesAdded = 0;
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
@@ -411,19 +411,24 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (Cond.empty()) {
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB);
+ auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(MI);
return 1;
}
// Conditional branch.
unsigned Count = 0;
AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm();
- BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+ auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+
+ if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB);
+ auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB);
+ if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI);
++Count;
}
@@ -432,7 +437,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- assert(!BytesRemoved && "code size not handled");
+ if (BytesRemoved) *BytesRemoved = 0;
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
@@ -450,6 +455,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
}
// Remove the branch.
+ if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I);
I->eraseFromParent();
I = MBB.end();
++Count;
@@ -494,5 +500,61 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
}
}
+MachineBasicBlock *
+AVRInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode!");
+ case AVR::JMPk:
+ case AVR::CALLk:
+ case AVR::RCALLk:
+ case AVR::RJMPk:
+ case AVR::BREQk:
+ case AVR::BRNEk:
+ case AVR::BRSHk:
+ case AVR::BRLOk:
+ case AVR::BRMIk:
+ case AVR::BRPLk:
+ case AVR::BRGEk:
+ case AVR::BRLTk:
+ return MI.getOperand(0).getMBB();
+ case AVR::BRBSsk:
+ case AVR::BRBCsk:
+ return MI.getOperand(1).getMBB();
+ case AVR::SBRCRrB:
+ case AVR::SBRSRrB:
+ case AVR::SBICAb:
+ case AVR::SBISAb:
+ llvm_unreachable("unimplemented branch instructions");
+ }
+}
+
+bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+ int64_t BrOffset) const {
+
+ switch (BranchOp) {
+ default:
+ llvm_unreachable("unexpected opcode!");
+ case AVR::JMPk:
+ case AVR::CALLk:
+ assert(BrOffset >= 0 && "offset must be absolute address");
+ return isUIntN(16, BrOffset);
+ case AVR::RCALLk:
+ case AVR::RJMPk:
+ return isIntN(13, BrOffset);
+ case AVR::BRBSsk:
+ case AVR::BRBCsk:
+ case AVR::BREQk:
+ case AVR::BRNEk:
+ case AVR::BRSHk:
+ case AVR::BRLOk:
+ case AVR::BRMIk:
+ case AVR::BRPLk:
+ case AVR::BRGEk:
+ case AVR::BRLTk:
+ return isIntN(7, BrOffset);
+ }
+}
+
} // end of namespace llvm
diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h
index c5105dafe5eb5..f42d34fb28480 100644
--- a/lib/Target/AVR/AVRInstrInfo.h
+++ b/lib/Target/AVR/AVRInstrInfo.h
@@ -103,6 +103,10 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
+ bool isBranchOffsetInRange(unsigned BranchOpc,
+ int64_t BrOffset) const override;
private:
const AVRRegisterInfo RI;
};
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index 5dd8b2c27b212..184e4d53f7c8f 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -1411,17 +1411,11 @@ hasSideEffects = 0 in
def LPMRdZ : FLPMX<0,
0,
(outs GPR8:$dst),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"lpm\t$dst, $z",
[]>,
Requires<[HasLPMX]>;
- def LPMWRdZ : Pseudo<(outs DREGS:$dst),
- (ins ZREGS:$z),
- "lpmw\t$dst, $z",
- []>,
- Requires<[HasLPMX]>;
-
// Load program memory, while postincrementing the Z register.
let mayLoad = 1,
Defs = [R31R30] in
@@ -1429,13 +1423,19 @@ hasSideEffects = 0 in
def LPMRdZPi : FLPMX<0,
1,
(outs GPR8:$dst),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"lpm\t$dst, $z+",
[]>,
Requires<[HasLPMX]>;
+ def LPMWRdZ : Pseudo<(outs DREGS:$dst),
+ (ins ZREG:$z),
+ "lpmw\t$dst, $z",
+ []>,
+ Requires<[HasLPMX]>;
+
def LPMWRdZPi : Pseudo<(outs DREGS:$dst),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"lpmw\t$dst, $z+",
[]>,
Requires<[HasLPMX]>;
@@ -1458,7 +1458,7 @@ hasSideEffects = 0 in
def ELPMRdZ : FLPMX<1,
0,
(outs GPR8:$dst),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"elpm\t$dst, $z",
[]>,
Requires<[HasELPMX]>;
@@ -1467,7 +1467,7 @@ hasSideEffects = 0 in
def ELPMRdZPi : FLPMX<1,
1,
(outs GPR8:$dst),
- (ins ZREGS: $z),
+ (ins ZREG: $z),
"elpm\t$dst, $z+",
[]>,
Requires<[HasELPMX]>;
@@ -1487,7 +1487,7 @@ let Uses = [R1, R0] in
let Defs = [R31R30] in
def SPMZPi : F16<0b1001010111111000,
(outs),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"spm $z+",
[]>,
Requires<[HasSPMX]>;
@@ -1564,28 +1564,28 @@ hasSideEffects = 0 in
// Read-Write-Modify (RMW) instructions.
def XCHZRd : FZRd<0b100,
(outs GPR8:$rd),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"xch\t$z, $rd",
[]>,
Requires<[SupportsRMW]>;
def LASZRd : FZRd<0b101,
(outs GPR8:$rd),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"las\t$z, $rd",
[]>,
Requires<[SupportsRMW]>;
def LACZRd : FZRd<0b110,
(outs GPR8:$rd),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"lac\t$z, $rd",
[]>,
Requires<[SupportsRMW]>;
def LATZRd : FZRd<0b111,
(outs GPR8:$rd),
- (ins ZREGS:$z),
+ (ins ZREG:$z),
"lat\t$z, $rd",
[]>,
Requires<[SupportsRMW]>;
diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp
index 475dda420e892..dfefd09bc4b86 100644
--- a/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/lib/Target/AVR/AVRMCInstLower.cpp
@@ -37,10 +37,22 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
}
+ bool IsFunction = MO.isGlobal() && isa<Function>(MO.getGlobal());
+
if (TF & AVRII::MO_LO) {
- Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx);
+ if (IsFunction) {
+ // N.B. Should we use _GS fixups here to cope with >128k progmem?
+ Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_LO8, Expr, IsNegated, Ctx);
+ } else {
+ Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx);
+ }
} else if (TF & AVRII::MO_HI) {
- Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx);
+ if (IsFunction) {
+ // N.B. Should we use _GS fixups here to cope with >128k progmem?
+ Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_HI8, Expr, IsNegated, Ctx);
+ } else {
+ Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx);
+ }
} else if (TF != 0) {
llvm_unreachable("Unknown target flag on symbol operand");
}
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 55f3f5cf428ac..249dc5512c289 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -95,7 +95,8 @@ AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
}
/// Fold a frame offset shared between two add instructions into a single one.
-static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) {
+static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset, unsigned DstReg) {
+ MachineInstr &MI = *II;
int Opcode = MI.getOpcode();
// Don't bother trying if the next instruction is not an add or a sub.
@@ -120,6 +121,7 @@ static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) {
}
// Finally remove the instruction.
+ II++;
MI.eraseFromParent();
}
@@ -158,6 +160,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned DstReg = MI.getOperand(0).getReg();
assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer");
+ II++; // Skip over the FRMIDX (and now MOVW) instruction.
+
// Generally, to load a frame address two add instructions are emitted that
// could get folded into a single one:
// movw r31:r30, r29:r28
@@ -166,7 +170,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to:
// movw r31:r30, r29:r28
// adiw r31:r30, 45
- foldFrameOffset(*std::next(II), Offset, DstReg);
+ if (II != MBB.end())
+ foldFrameOffset(II, Offset, DstReg);
// Select the best opcode based on DstReg and the offset size.
switch (DstReg) {
@@ -187,7 +192,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
- MachineInstr *New = BuildMI(MBB, std::next(II), dl, TII.get(Opcode), DstReg)
+ MachineInstr *New = BuildMI(MBB, II, dl, TII.get(Opcode), DstReg)
.addReg(DstReg, RegState::Kill)
.addImm(Offset);
New->getOperand(3).setIsDead();
diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td
index 32650fc66751e..8162f12052be5 100644
--- a/lib/Target/AVR/AVRRegisterInfo.td
+++ b/lib/Target/AVR/AVRRegisterInfo.td
@@ -110,8 +110,6 @@ CoveredBySubRegs = 1 in
// Register Classes
//===----------------------------------------------------------------------===//
-//:TODO: use proper set instructions instead of using always "add"
-
// Main 8-bit register class.
def GPR8 : RegisterClass<"AVR", [i8], 8,
(
@@ -199,14 +197,11 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
// We have a bunch of instructions with an explicit Z register argument. We
// model this using a register class containing only the Z register.
-// :TODO: Rename to 'ZREG'.
-def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
+def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
// Register class used for the stack read pseudo instruction.
def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
-//:TODO: if we remove this we get an error in tablegen
-//:TODO: this is just a hack, remove it once add16 works!
// Status register.
def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index 91d2a8737b870..a9d61ffc952c3 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -66,6 +66,7 @@ public:
bool addInstSelector() override;
void addPreSched2() override;
+ void addPreEmitPass() override;
void addPreRegAlloc() override;
};
} // namespace
@@ -115,4 +116,9 @@ void AVRPassConfig::addPreSched2() {
addPass(createAVRExpandPseudoPass());
}
+void AVRPassConfig::addPreEmitPass() {
+ // Must run branch selection immediately preceding the asm printer.
+ addPass(&BranchRelaxationPassID);
+}
+
} // end of namespace llvm
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index cf52e552978f1..5004736365c7b 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -466,6 +466,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
if (!tryParseRegisterOperand(Operands)) {
return false;
}
+ LLVM_FALLTHROUGH;
case AsmToken::LParen:
case AsmToken::Integer:
case AsmToken::Dot:
diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
index 316b7836df0d7..0f34b8e18ff96 100644
--- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
+++ b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
@@ -106,7 +106,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) ||
(MOI.RegClass == AVR::PTRDISPREGSRegClassID) ||
- (MOI.RegClass == AVR::ZREGSRegClassID);
+ (MOI.RegClass == AVR::ZREGRegClassID);
if (isPtrReg) {
O << getRegisterName(Op.getReg(), AVR::ptr);
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 1e61eccf775f5..6d126ed622aa1 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -33,7 +33,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) {
EFlags |= ELF::EF_AVR_ARCH_AVR51;
else if (Features[AVR::ELFArchAVR6])
EFlags |= ELF::EF_AVR_ARCH_AVR6;
- else if (Features[AVR::ELFArchAVRTiny])
+ else if (Features[AVR::ELFArchTiny])
EFlags |= ELF::EF_AVR_ARCH_AVRTINY;
else if (Features[AVR::ELFArchXMEGA1])
EFlags |= ELF::EF_AVR_ARCH_XMEGA1;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 15e89fb2a2611..9fc812cdef14f 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -29,7 +29,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -65,7 +65,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index c19e636d79ca2..d901abbd16925 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -1413,6 +1413,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
// Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
case Hexagon::CONST32:
is32bit = true;
+ LLVM_FALLTHROUGH;
// Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
case Hexagon::CONST64:
// FIXME: need better way to detect AsmStreamer (upstream removed getKind())
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 14c682c6df4bc..b064778c4bbd3 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1947,8 +1947,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
switch (Opc) {
case Hexagon::S2_storeri_io:
Align++;
+ LLVM_FALLTHROUGH;
case Hexagon::S2_storerh_io:
Align++;
+ LLVM_FALLTHROUGH;
case Hexagon::S2_storerb_io:
break;
default:
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 730026121d3be..3de5310882409 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -937,6 +937,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
case Hexagon::J2_jumpfnew:
case Hexagon::J2_jumpfnewpt:
Negated = true;
+ LLVM_FALLTHROUGH;
case Hexagon::J2_jumpt:
case Hexagon::J2_jumptpt:
case Hexagon::J2_jumptnew:
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index aa68f6cfdfc11..49ddd6961f8a9 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2244,6 +2244,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI,
case Hexagon::J2_jumpfnew:
case Hexagon::J2_jumpfnewpt:
Negated = true;
+ LLVM_FALLTHROUGH;
case Hexagon::J2_jumpt:
case Hexagon::J2_jumptnew:
case Hexagon::J2_jumptnewpt:
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 97a53dcbaed72..c790579ccebc0 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -979,18 +979,6 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
if (MFI.hasCalls() || HMFI.hasClobberLR())
return true;
- // Frame pointer elimination is a possiblility at this point, but
- // to know if FP is necessary we need to know if spill/restore
- // functions will be used (they require FP to be valid).
- // This means that hasFP shouldn't really be called before CSI is
- // calculated, and some measures are taken to make sure of that
- // (e.g. default implementations of virtual functions that call it
- // are overridden apropriately).
- assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI");
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI))
- return true;
-
return false;
}
@@ -2437,6 +2425,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
+ if (!hasFP(MF))
+ return true;
if (!isOptSize(MF) && !isMinSize(MF))
if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
return true;
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index f14c733dcf511..3470480d607dc 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -334,6 +334,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
if (MRI->getRegClass(PR.R) != PredRC)
return false;
// If it is a copy between two predicate registers, fall through.
+ LLVM_FALLTHROUGH;
}
case Hexagon::C2_and:
case Hexagon::C2_andn:
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e5f49ca77a912..0163b2e2bdc46 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -241,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
- : Hexagon::V6_vL32b_ai_128B;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B
+ : Hexagon::V6_vL32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
+ : Hexagon::V6_vL32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B
: Hexagon::V6_vL32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed load");
@@ -529,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
- : Hexagon::V6_vS32b_ai_128B;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B
+ : Hexagon::V6_vS32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
+ : Hexagon::V6_vS32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B
: Hexagon::V6_vS32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed store");
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 2daacf7955559..67242764d4531 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -716,6 +716,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
auto PtrVT = getPointerTy(MF.getDataLayout());
// Check for varargs.
@@ -832,7 +833,6 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (NeedsArgAlign && Subtarget.hasV60TOps()) {
DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// V6 vectors passed by value have 64 or 128 byte alignment depending
// on whether we are 64 byte vector mode or 128 byte.
bool UseHVXDbl = Subtarget.useHVXDblOps();
@@ -916,10 +916,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Glue);
if (IsTailCall) {
- MF.getFrameInfo().setHasTailCall();
+ MFI.setHasTailCall();
return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
}
+ // Set this here because we need to know this for "hasFP" in frame lowering.
+ // The target-independent code calls getFrameRegister before setting it, and
+ // getFrameRegister uses hasFP to determine whether the function has FP.
+ MFI.setHasCalls(true);
+
unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
Glue = Chain.getValue(1);
@@ -1284,11 +1289,9 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
// Creates a SPLAT instruction for a constant value VAL.
static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
SDValue Val) {
- if (VT.getSimpleVT() == MVT::v4i8)
- return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
-
- if (VT.getSimpleVT() == MVT::v4i16)
- return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+ EVT T = VT.getVectorElementType();
+ if (T == MVT::i8 || T == MVT::i16)
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VT, Val);
return SDValue();
}
@@ -2296,32 +2299,13 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
- case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
- case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
- case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB";
- case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
- case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ";
- case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT";
- case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU";
- case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ";
- case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT";
- case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU";
- case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
- case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
- case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
case HexagonISD::VPACK: return "HexagonISD::VPACK";
- case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
- case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
- case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
- case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH";
- case HexagonISD::VSRAH: return "HexagonISD::VSRAH";
- case HexagonISD::VSRAW: return "HexagonISD::VSRAW";
- case HexagonISD::VSRLH: return "HexagonISD::VSRLH";
- case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
- case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
- case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::VASL: return "HexagonISD::VASL";
+ case HexagonISD::VASR: return "HexagonISD::VASR";
+ case HexagonISD::VLSR: return "HexagonISD::VLSR";
+ case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::OP_END: break;
}
@@ -2503,13 +2487,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
if (VT.getSimpleVT() == MVT::v4i16) {
switch (Op.getOpcode()) {
case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
break;
case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
break;
case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
break;
default:
return SDValue();
@@ -2517,13 +2501,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
} else if (VT.getSimpleVT() == MVT::v2i32) {
switch (Op.getOpcode()) {
case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
break;
case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
break;
case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+ Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
break;
default:
return SDValue();
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 1415156487c07..bfd2c94eeabaa 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -52,29 +52,10 @@ namespace HexagonISD {
COMBINE,
PACKHL,
- VSPLATB,
- VSPLATH,
- SHUFFEB,
- SHUFFEH,
- SHUFFOB,
- SHUFFOH,
- VSXTBH,
- VSXTBW,
- VSRAW,
- VSRAH,
- VSRLW,
- VSRLH,
- VSHLW,
- VSHLH,
- VCMPBEQ,
- VCMPBGT,
- VCMPBGTU,
- VCMPHEQ,
- VCMPHGT,
- VCMPHGTU,
- VCMPWEQ,
- VCMPWGT,
- VCMPWGTU,
+ VSPLAT,
+ VASL,
+ VASR,
+ VLSR,
INSERT,
INSERTRP,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 1eac2d3dd8e22..c77c669f4ca75 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -250,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case Hexagon::L2_loadri_io:
case Hexagon::L2_loadrd_io:
case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::LDriw_pred:
case Hexagon::LDriw_mod:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::PS_vloadrq_ai_128B:
- case Hexagon::PS_vloadrw_ai_128B: {
+ case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B: {
const MachineOperand OpFI = MI.getOperand(1);
if (!OpFI.isFI())
return 0;
@@ -1726,6 +1730,39 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
return false;
}
+std::pair<unsigned, unsigned>
+HexagonInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ return std::make_pair(TF & ~HexagonII::MO_Bitmasks,
+ TF & HexagonII::MO_Bitmasks);
+}
+
+ArrayRef<std::pair<unsigned, const char*>>
+HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace HexagonII;
+ static const std::pair<unsigned, const char*> Flags[] = {
+ {MO_PCREL, "hexagon-pcrel"},
+ {MO_GOT, "hexagon-got"},
+ {MO_LO16, "hexagon-lo16"},
+ {MO_HI16, "hexagon-hi16"},
+ {MO_GPREL, "hexagon-gprel"},
+ {MO_GDGOT, "hexagon-gdgot"},
+ {MO_GDPLT, "hexagon-gdplt"},
+ {MO_IE, "hexagon-ie"},
+ {MO_IEGOT, "hexagon-iegot"},
+ {MO_TPREL, "hexagon-tprel"}
+ };
+ return makeArrayRef(Flags);
+}
+
+ArrayRef<std::pair<unsigned, const char*>>
+HexagonInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace HexagonII;
+ static const std::pair<unsigned, const char*> Flags[] = {
+ {HMOTF_ConstExtended, "hexagon-ext"}
+ };
+ return makeArrayRef(Flags);
+}
+
unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *TRC;
@@ -1797,7 +1834,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
const MachineOperand &MO = MI.getOperand(ExtOpNum);
// Use MO operand flags to determine if MO
// has the HMOTF_ConstExtended flag set.
- if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
return true;
// If this is a Machine BB address we are talking about, and it is
// not marked as extended, say so.
@@ -1807,9 +1844,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
// We could be using an instruction with an extendable immediate and shoehorn
// a global address into it. If it is a global address it will be constant
// extended. We do this for COMBINE.
- // We currently only handle isGlobal() because it is the only kind of
- // object we are going to end up with here for now.
- // In the future we probably should add isSymbol(), etc.
if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
MO.isJTI() || MO.isCPI() || MO.isFPImm())
return true;
@@ -1961,11 +1995,9 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const {
return true;
// Use MO operand flags to determine if one of MI's operands
// has HMOTF_ConstExtended flag set.
- for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
- E = MI.operands_end(); I != E; ++I) {
- if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
return true;
- }
return false;
}
@@ -2445,20 +2477,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
switch (Opcode) {
case Hexagon::PS_vstorerq_ai:
case Hexagon::PS_vstorerw_ai:
+ case Hexagon::PS_vstorerw_nt_ai:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vS32b_nt_ai:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vS32Ub_ai:
return isShiftedInt<4,6>(Offset);
case Hexagon::PS_vstorerq_ai_128B:
case Hexagon::PS_vstorerw_ai_128B:
+ case Hexagon::PS_vstorerw_nt_ai_128B:
case Hexagon::PS_vloadrq_ai_128B:
case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B:
case Hexagon::V6_vL32b_ai_128B:
case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ case Hexagon::V6_vS32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::V6_vS32Ub_ai_128B:
return isShiftedInt<4,7>(Offset);
@@ -3170,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_cur_pi;
case Hexagon::V6_vL32b_ai:
return Hexagon::V6_vL32b_cur_ai;
+ case Hexagon::V6_vL32b_nt_pi:
+ return Hexagon::V6_vL32b_nt_cur_pi;
+ case Hexagon::V6_vL32b_nt_ai:
+ return Hexagon::V6_vL32b_nt_cur_ai;
//128B
case Hexagon::V6_vL32b_pi_128B:
return Hexagon::V6_vL32b_cur_pi_128B;
case Hexagon::V6_vL32b_ai_128B:
return Hexagon::V6_vL32b_cur_ai_128B;
+ case Hexagon::V6_vL32b_nt_pi_128B:
+ return Hexagon::V6_vL32b_nt_cur_pi_128B;
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ return Hexagon::V6_vL32b_nt_cur_ai_128B;
}
return 0;
}
@@ -3187,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_pi;
case Hexagon::V6_vL32b_cur_ai:
return Hexagon::V6_vL32b_ai;
+ case Hexagon::V6_vL32b_nt_cur_pi:
+ return Hexagon::V6_vL32b_nt_pi;
+ case Hexagon::V6_vL32b_nt_cur_ai:
+ return Hexagon::V6_vL32b_nt_ai;
//128B
case Hexagon::V6_vL32b_cur_pi_128B:
return Hexagon::V6_vL32b_pi_128B;
case Hexagon::V6_vL32b_cur_ai_128B:
return Hexagon::V6_vL32b_ai_128B;
+ case Hexagon::V6_vL32b_nt_cur_pi_128B:
+ return Hexagon::V6_vL32b_nt_pi_128B;
+ case Hexagon::V6_vL32b_nt_cur_ai_128B:
+ return Hexagon::V6_vL32b_nt_ai_128B;
}
return 0;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 944d0161a7c8e..0436ce3ac475b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -301,6 +301,27 @@ public:
const MachineInstr &UseMI,
unsigned UseIdx) const override;
+ /// Decompose the machine operand's target flags into two values - the direct
+ /// target flag value and any of bit flags that are applied.
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ /// Return an array that contains the direct target flag values and their
+ /// names.
+ ///
+ /// MIR Serialization is able to serialize only the target flags that are
+ /// defined by this method.
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+ /// Return an array that contains the bitmask target flag values and their
+ /// names.
+ ///
+ /// MIR Serialization is able to serialize only the target flags that are
+ /// defined by this method.
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
bool isTailCall(const MachineInstr &MI) const override;
/// HexagonInstrInfo specifics.
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 4602de979024a..1a26805d190d0 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -49,7 +49,7 @@ static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
namespace {
class HexagonCallMutation : public ScheduleDAGMutation {
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 689419638f546..ba98b8994937f 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -2770,6 +2770,9 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
(V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2778,6 +2781,9 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// 128B Aligned stores
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
(V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
@@ -2787,6 +2793,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
(add IntRegs:$src2, Iss4_6:$offset)),
(V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset,
@@ -2799,6 +2810,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// Fold Add R+OFF into vector store 128B.
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
(add IntRegs:$src2, Iss4_7:$offset)),
(V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset,
@@ -2820,6 +2836,9 @@ defm : vS32b_ai_pats <v8i64, v16i64>;
multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned loads
+ def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai IntRegs:$addr, 0) >,
+ Requires<[UseHVXSgl]>;
def : Pat < (VTSgl (alignedload IntRegs:$addr)),
(V6_vL32b_ai IntRegs:$addr, 0) >,
Requires<[UseHVXSgl]>;
@@ -2828,6 +2847,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// 128B Load
+ def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >,
+ Requires<[UseHVXDbl]>;
def : Pat < (VTDbl (alignedload IntRegs:$addr)),
(V6_vL32b_ai_128B IntRegs:$addr, 0) >,
Requires<[UseHVXDbl]>;
@@ -2837,6 +2859,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
+ def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))),
(V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
@@ -2844,6 +2869,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
(V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
+ def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))),
(V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
@@ -2859,6 +2887,9 @@ defm : vL32b_ai_pats <v16i32, v32i32>;
defm : vL32b_ai_pats <v8i64, v16i64>;
multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2866,6 +2897,10 @@ multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
(PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
+ def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(PS_vstorerw_ai_128B IntRegs:$addr, 0,
(VTDbl VecDblRegs128B:$src1))>,
@@ -2882,6 +2917,9 @@ defm : STrivv_pats <v32i32, v64i32>;
defm : STrivv_pats <v16i64, v32i64>;
multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(VTSgl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai I32:$addr, 0)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload I32:$addr)),
(PS_vloadrw_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
@@ -2889,6 +2927,9 @@ multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
(PS_vloadrwu_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
+ def : Pat<(VTDbl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai_128B I32:$addr, 0)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload I32:$addr)),
(PS_vloadrw_ai_128B I32:$addr, 0)>,
Requires<[UseHVXDbl]>;
@@ -3021,16 +3062,16 @@ def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
(A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
-def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
-def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>;
// Replicate the low 8-bits from 32-bits input register into each of the
// four bytes of 32-bits destination register.
-def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
// Replicate the low 16-bits from 32-bits input register into each of the
// four halfwords of 64-bits destination register.
-def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
@@ -3068,84 +3109,44 @@ def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
(i32 u5_0ImmPred:$c))))),
(S2_asl_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))),
(S2_asr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))),
(S2_lsr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))),
(S2_asl_i_vh V4I16:$b, imm:$c)>;
-def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
-def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+def SDTHexagonVShift
+ : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>;
-def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>;
+def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>;
+def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>;
-def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
+def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)),
(S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
+def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)),
(S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
+def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)),
(S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
+def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)),
(S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
- (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
- (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
: Pat <(Op Value:$Rs, I32:$Rt),
(MI Value:$Rs, I32:$Rt)>;
-def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
-def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
-def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
-def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
-def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
-def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
-
-
-def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
-def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
-def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
-
-def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
-
-
-class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
- : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
- (MI Value:$Rs, Value:$Rt)>;
-
-def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
-def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
-def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
-
-def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
-def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
-def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
-
-def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVASL, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVASL, V4I16>;
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVASR, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVASR, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVLSR, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVLSR, V4I16>;
class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
@@ -3255,13 +3256,6 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)),
def: Pat<(v2i16 (trunc V2I32:$Rs)),
(LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
-
-def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
-def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
-
-def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
-def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
-
def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
@@ -3322,31 +3316,6 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
(S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
-def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
-
-def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
-def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
-def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
-def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
-
-class ShufflePat<InstHexagon MI, SDNode Op>
- : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
- (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
-def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
-
-// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
-def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
-
-// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
-def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
-
-// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
-def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
-
-
// Truncated store from v4i16 to v4i8.
def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr),
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index 93fb688fc1c0a..b42c1ab975a80 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -407,6 +407,11 @@ def PS_vstorerw_ai: STrivv_template<VecDblRegs, V6_vS32b_ai>,
def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerw_nt_ai: STrivv_template<VecDblRegs, V6_vS32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_nt_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vstorerwu_ai: STrivv_template<VecDblRegs, V6_vS32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32Ub_ai_128B>,
@@ -433,6 +438,11 @@ def PS_vloadrw_ai: LDrivv_template<VecDblRegs, V6_vL32b_ai>,
def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrw_nt_ai: LDrivv_template<VecDblRegs, V6_vL32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_nt_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vloadrwu_ai: LDrivv_template<VecDblRegs, V6_vL32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32Ub_ai_128B>,
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index db268b78cd73f..4fa929a20810a 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -350,6 +350,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
MI->getOperand(2).getImm());
case Hexagon::A4_combineri:
ImmX++;
+ // Fall through into A4_combineir.
+ LLVM_FALLTHROUGH;
case Hexagon::A4_combineir: {
ImmX++;
int64_t V = MI->getOperand(ImmX).getImm();
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 76d9b31b005ff..7d88b51f32dd3 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -110,10 +110,11 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
- void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
- void initializeHexagonOptAddrModePass(PassRegistry&);
+ void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
void initializeHexagonNewValueJumpPass(PassRegistry&);
+ void initializeHexagonOptAddrModePass(PassRegistry&);
+ void initializeHexagonPacketizerPass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
@@ -156,10 +157,11 @@ extern "C" void LLVMInitializeHexagonTarget() {
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
- initializeHexagonLoopIdiomRecognizePass(PR);
initializeHexagonGenMuxPass(PR);
- initializeHexagonOptAddrModePass(PR);
+ initializeHexagonLoopIdiomRecognizePass(PR);
initializeHexagonNewValueJumpPass(PR);
+ initializeHexagonOptAddrModePass(PR);
+ initializeHexagonPacketizerPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 7667bfb7a0eb4..a3021e3dfe432 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -60,9 +60,7 @@ namespace {
class HexagonPacketizer : public MachineFunctionPass {
public:
static char ID;
- HexagonPacketizer() : MachineFunctionPass(ID) {
- initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
- }
+ HexagonPacketizer() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -89,14 +87,14 @@ namespace {
char HexagonPacketizer::ID = 0;
}
-INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
- false, false)
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
+ "Hexagon Packetizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
- false, false)
+INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
+ "Hexagon Packetizer", false, false)
HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
MachineLoopInfo &MLI, AliasAnalysis *AA,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 34d0b55aa22ae..2a0edda8dcee8 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -412,7 +412,7 @@ public:
/// fixup kind as appropriate.
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t FixupValue, bool IsPCRel) const override {
+ uint64_t FixupValue, bool IsResolved) const override {
// When FixupValue is 0 the relocation is external and there
// is nothing for us to do.
@@ -442,6 +442,7 @@ public:
case fixup_Hexagon_B7_PCREL:
if (!(isIntN(7, sValue)))
HandleFixupError(7, 2, (int64_t)FixupValue, "B7_PCREL");
+ LLVM_FALLTHROUGH;
case fixup_Hexagon_B7_PCREL_X:
InstMask = 0x00001f18; // Word32_B7
Reloc = (((Value >> 2) & 0x1f) << 8) | // Value 6-2 = Target 12-8
@@ -451,6 +452,7 @@ public:
case fixup_Hexagon_B9_PCREL:
if (!(isIntN(9, sValue)))
HandleFixupError(9, 2, (int64_t)FixupValue, "B9_PCREL");
+ LLVM_FALLTHROUGH;
case fixup_Hexagon_B9_PCREL_X:
InstMask = 0x003000fe; // Word32_B9
Reloc = (((Value >> 7) & 0x3) << 20) | // Value 8-7 = Target 21-20
@@ -462,6 +464,7 @@ public:
case fixup_Hexagon_B13_PCREL:
if (!(isIntN(13, sValue)))
HandleFixupError(13, 2, (int64_t)FixupValue, "B13_PCREL");
+ LLVM_FALLTHROUGH;
case fixup_Hexagon_B13_PCREL_X:
InstMask = 0x00202ffe; // Word32_B13
Reloc = (((Value >> 12) & 0x1) << 21) | // Value 12 = Target 21
@@ -472,6 +475,7 @@ public:
case fixup_Hexagon_B15_PCREL:
if (!(isIntN(15, sValue)))
HandleFixupError(15, 2, (int64_t)FixupValue, "B15_PCREL");
+ LLVM_FALLTHROUGH;
case fixup_Hexagon_B15_PCREL_X:
InstMask = 0x00df20fe; // Word32_B15
Reloc = (((Value >> 13) & 0x3) << 22) | // Value 14-13 = Target 23-22
@@ -483,6 +487,7 @@ public:
case fixup_Hexagon_B22_PCREL:
if (!(isIntN(22, sValue)))
HandleFixupError(22, 2, (int64_t)FixupValue, "B22_PCREL");
+ LLVM_FALLTHROUGH;
case fixup_Hexagon_B22_PCREL_X:
InstMask = 0x01ff3ffe; // Word32_B22
Reloc = (((Value >> 13) & 0x1ff) << 16) | // Value 21-13 = Target 24-16
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index d8009c5da08ee..7f90e83fc8e9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -169,8 +169,11 @@ namespace HexagonII {
// Hexagon specific MO operand flag mask.
enum HexagonMOTargetFlagVal {
- //===------------------------------------------------------------------===//
- // Hexagon Specific MachineOperand flags.
+ // Hexagon-specific MachineOperand target flags.
+ //
+ // When chaning these, make sure to update
+ // getSerializableDirectMachineOperandTargetFlags and
+ // getSerializableBitmaskMachineOperandTargetFlags if needed.
MO_NO_FLAG,
/// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
@@ -207,10 +210,12 @@ namespace HexagonII {
MO_TPREL,
// HMOTF_ConstExtended
- // Addendum to abovem, indicates a const extended op
+ // Addendum to above, indicates a const extended op
// Can be used as a mask.
- HMOTF_ConstExtended = 0x80
+ HMOTF_ConstExtended = 0x80,
+ // Union of all bitmasks (currently only HMOTF_ConstExtended).
+ MO_Bitmasks = HMOTF_ConstExtended
};
// Hexagon Sub-instruction classes.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 564d43b45cb87..1604e7c8dc549 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -259,6 +259,7 @@ bool HexagonShuffler::check() {
break;
case HexagonII::TypeCVI_VM_VP_LDU:
++onlyNo1;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeCVI_VM_LD:
case HexagonII::TypeCVI_VM_TMP_LD:
case HexagonII::TypeLD:
@@ -274,6 +275,7 @@ bool HexagonShuffler::check() {
break;
case HexagonII::TypeCVI_VM_STU:
++onlyNo1;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeCVI_VM_ST:
case HexagonII::TypeCVI_VM_NEW_ST:
case HexagonII::TypeST:
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 72e471f5766e5..1394ac7210f2f 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -787,6 +787,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() {
case AsmToken::Dot:
if (!Parser.parseExpression(ExprVal))
return LanaiOperand::createImm(ExprVal, Start, End);
+ LLVM_FALLTHROUGH;
default:
return nullptr;
}
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index c212726113ab7..bbce5f670c99e 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -51,7 +51,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -92,7 +92,7 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool /*IsPCRel*/) const {
+ bool /*IsResolved*/) const {
MCFixupKind Kind = Fixup.getKind();
Value = adjustFixupValue(static_cast<unsigned>(Kind), Value);
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 69b1ba1528d04..b72c9d5344787 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -304,6 +304,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -343,6 +346,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetPushDirective();
bool parseSetSoftFloatDirective();
bool parseSetHardFloatDirective();
+ bool parseSetMtDirective();
+ bool parseSetNoMtDirective();
bool parseSetAssignment();
@@ -628,6 +633,9 @@ public:
bool useSoftFloat() const {
return getSTI().getFeatureBits()[Mips::FeatureSoftFloat];
}
+ bool hasMT() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMT];
+ }
/// Warn if RegIndex is the same as the current AT.
void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc);
@@ -1966,6 +1974,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
case Mips::SDIV_MM:
FirstOp = 0;
SecondOp = 1;
+ LLVM_FALLTHROUGH;
case Mips::SDivMacro:
case Mips::DSDivMacro:
case Mips::UDivMacro:
@@ -2505,6 +2514,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SEQIMacro:
return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MFTC0: case Mips::MTTC0:
+ case Mips::MFTGPR: case Mips::MTTGPR:
+ case Mips::MFTLO: case Mips::MTTLO:
+ case Mips::MFTHI: case Mips::MTTHI:
+ case Mips::MFTACX: case Mips::MTTACX:
+ case Mips::MFTDSP: case Mips::MTTDSP:
+ case Mips::MFTC1: case Mips::MTTC1:
+ case Mips::MFTHC1: case Mips::MTTHC1:
+ case Mips::CFTC1: case Mips::CTTC1:
+ return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
}
}
@@ -4876,6 +4895,212 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+// Map the DSP accumulator and control register to the corresponding gpr
+// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions
+// do not map the DSP registers contigously to gpr registers.
+static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) {
+ switch (Inst.getOpcode()) {
+ case Mips::MFTLO:
+ case Mips::MTTLO:
+ switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+ case Mips::AC0:
+ return Mips::ZERO;
+ case Mips::AC1:
+ return Mips::A0;
+ case Mips::AC2:
+ return Mips::T0;
+ case Mips::AC3:
+ return Mips::T4;
+ default:
+ llvm_unreachable("Unknown register for 'mttr' alias!");
+ }
+ case Mips::MFTHI:
+ case Mips::MTTHI:
+ switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+ case Mips::AC0:
+ return Mips::AT;
+ case Mips::AC1:
+ return Mips::A1;
+ case Mips::AC2:
+ return Mips::T1;
+ case Mips::AC3:
+ return Mips::T5;
+ default:
+ llvm_unreachable("Unknown register for 'mttr' alias!");
+ }
+ case Mips::MFTACX:
+ case Mips::MTTACX:
+ switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
+ case Mips::AC0:
+ return Mips::V0;
+ case Mips::AC1:
+ return Mips::A2;
+ case Mips::AC2:
+ return Mips::T2;
+ case Mips::AC3:
+ return Mips::T6;
+ default:
+ llvm_unreachable("Unknown register for 'mttr' alias!");
+ }
+ case Mips::MFTDSP:
+ case Mips::MTTDSP:
+ return Mips::S0;
+ default:
+ llvm_unreachable("Unknown instruction for 'mttr' dsp alias!");
+ }
+}
+
+// Map the floating point register operand to the corresponding register
+// operand.
+static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) {
+ switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) {
+ case Mips::F0: return Mips::ZERO;
+ case Mips::F1: return Mips::AT;
+ case Mips::F2: return Mips::V0;
+ case Mips::F3: return Mips::V1;
+ case Mips::F4: return Mips::A0;
+ case Mips::F5: return Mips::A1;
+ case Mips::F6: return Mips::A2;
+ case Mips::F7: return Mips::A3;
+ case Mips::F8: return Mips::T0;
+ case Mips::F9: return Mips::T1;
+ case Mips::F10: return Mips::T2;
+ case Mips::F11: return Mips::T3;
+ case Mips::F12: return Mips::T4;
+ case Mips::F13: return Mips::T5;
+ case Mips::F14: return Mips::T6;
+ case Mips::F15: return Mips::T7;
+ case Mips::F16: return Mips::S0;
+ case Mips::F17: return Mips::S1;
+ case Mips::F18: return Mips::S2;
+ case Mips::F19: return Mips::S3;
+ case Mips::F20: return Mips::S4;
+ case Mips::F21: return Mips::S5;
+ case Mips::F22: return Mips::S6;
+ case Mips::F23: return Mips::S7;
+ case Mips::F24: return Mips::T8;
+ case Mips::F25: return Mips::T9;
+ case Mips::F26: return Mips::K0;
+ case Mips::F27: return Mips::K1;
+ case Mips::F28: return Mips::GP;
+ case Mips::F29: return Mips::SP;
+ case Mips::F30: return Mips::FP;
+ case Mips::F31: return Mips::RA;
+ default: llvm_unreachable("Unknown register for mttc1 alias!");
+ }
+}
+
+// Map the coprocessor operand the corresponding gpr register operand.
+static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) {
+ switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) {
+ case Mips::COP00: return Mips::ZERO;
+ case Mips::COP01: return Mips::AT;
+ case Mips::COP02: return Mips::V0;
+ case Mips::COP03: return Mips::V1;
+ case Mips::COP04: return Mips::A0;
+ case Mips::COP05: return Mips::A1;
+ case Mips::COP06: return Mips::A2;
+ case Mips::COP07: return Mips::A3;
+ case Mips::COP08: return Mips::T0;
+ case Mips::COP09: return Mips::T1;
+ case Mips::COP010: return Mips::T2;
+ case Mips::COP011: return Mips::T3;
+ case Mips::COP012: return Mips::T4;
+ case Mips::COP013: return Mips::T5;
+ case Mips::COP014: return Mips::T6;
+ case Mips::COP015: return Mips::T7;
+ case Mips::COP016: return Mips::S0;
+ case Mips::COP017: return Mips::S1;
+ case Mips::COP018: return Mips::S2;
+ case Mips::COP019: return Mips::S3;
+ case Mips::COP020: return Mips::S4;
+ case Mips::COP021: return Mips::S5;
+ case Mips::COP022: return Mips::S6;
+ case Mips::COP023: return Mips::S7;
+ case Mips::COP024: return Mips::T8;
+ case Mips::COP025: return Mips::T9;
+ case Mips::COP026: return Mips::K0;
+ case Mips::COP027: return Mips::K1;
+ case Mips::COP028: return Mips::GP;
+ case Mips::COP029: return Mips::SP;
+ case Mips::COP030: return Mips::FP;
+ case Mips::COP031: return Mips::RA;
+ default: llvm_unreachable("Unknown register for mttc0 alias!");
+ }
+}
+
+/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing
+/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits.
+bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned rd = 0;
+ unsigned u = 1;
+ unsigned sel = 0;
+ unsigned h = 0;
+ bool IsMFTR = false;
+ switch (Inst.getOpcode()) {
+ case Mips::MFTC0:
+ IsMFTR = true;
+ LLVM_FALLTHROUGH;
+ case Mips::MTTC0:
+ u = 0;
+ rd = getRegisterForMxtrC0(Inst, IsMFTR);
+ sel = Inst.getOperand(2).getImm();
+ break;
+ case Mips::MFTGPR:
+ IsMFTR = true;
+ LLVM_FALLTHROUGH;
+ case Mips::MTTGPR:
+ rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg();
+ break;
+ case Mips::MFTLO:
+ case Mips::MFTHI:
+ case Mips::MFTACX:
+ case Mips::MFTDSP:
+ IsMFTR = true;
+ LLVM_FALLTHROUGH;
+ case Mips::MTTLO:
+ case Mips::MTTHI:
+ case Mips::MTTACX:
+ case Mips::MTTDSP:
+ rd = getRegisterForMxtrDSP(Inst, IsMFTR);
+ sel = 1;
+ break;
+ case Mips::MFTHC1:
+ h = 1;
+ LLVM_FALLTHROUGH;
+ case Mips::MFTC1:
+ IsMFTR = true;
+ rd = getRegisterForMxtrFP(Inst, IsMFTR);
+ sel = 2;
+ break;
+ case Mips::MTTHC1:
+ h = 1;
+ LLVM_FALLTHROUGH;
+ case Mips::MTTC1:
+ rd = getRegisterForMxtrFP(Inst, IsMFTR);
+ sel = 2;
+ break;
+ case Mips::CFTC1:
+ IsMFTR = true;
+ LLVM_FALLTHROUGH;
+ case Mips::CTTC1:
+ rd = getRegisterForMxtrFP(Inst, IsMFTR);
+ sel = 3;
+ break;
+ }
+ unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd;
+ unsigned Op1 =
+ IsMFTR ? rd
+ : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg()
+ : Inst.getOperand(0).getReg());
+
+ TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc,
+ STI);
+ return false;
+}
+
unsigned
MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
const OperandVector &Operands) {
@@ -6329,6 +6554,39 @@ bool MipsAsmParser::parseSetNoOddSPRegDirective() {
return false;
}
+bool MipsAsmParser::parseSetMtDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "mt".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ setFeatureBits(Mips::FeatureMT, "mt");
+ getTargetStreamer().emitDirectiveSetMt();
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMtDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nomt".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ clearFeatureBits(Mips::FeatureMT, "mt");
+
+ getTargetStreamer().emitDirectiveSetNoMt();
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
bool MipsAsmParser::parseSetPopDirective() {
MCAsmParser &Parser = getParser();
SMLoc Loc = getLexer().getLoc();
@@ -6829,6 +7087,10 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetMsaDirective();
} else if (Tok.getString() == "nomsa") {
return parseSetNoMsaDirective();
+ } else if (Tok.getString() == "mt") {
+ return parseSetMtDirective();
+ } else if (Tok.getString() == "nomt") {
+ return parseSetNoMtDirective();
} else if (Tok.getString() == "softfloat") {
return parseSetSoftFloatDirective();
} else if (Tok.getString() == "hardfloat") {
@@ -7078,6 +7340,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) {
/// ::= .module fp=value
/// ::= .module softfloat
/// ::= .module hardfloat
+/// ::= .module mt
bool MipsAsmParser::parseDirectiveModule() {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
@@ -7177,6 +7440,25 @@ bool MipsAsmParser::parseDirectiveModule() {
}
return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "mt") {
+ setModuleFeatureBits(Mips::FeatureMT, "mt");
+
+ // Synchronize the ABI Flags information with the FeatureBits information we
+ // updated above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated ABI Flags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted later).
+ getTargetStreamer().emitDirectiveModuleMT();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ return false; // parseDirectiveModule has finished successfully.
} else {
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index f385410270231..9abd4f1d6b08c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -159,6 +159,8 @@ public:
ASESet |= Mips::AFL_ASE_MICROMIPS;
if (P.inMips16Mode())
ASESet |= Mips::AFL_ASE_MIPS16;
+ if (P.hasMT())
+ ASESet |= Mips::AFL_ASE_MT;
}
template <class PredicateLibrary>
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index ae48d6e38fa0f..a1ed0ea4d7f36 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -238,7 +238,7 @@ static unsigned calculateMMLEIndex(unsigned i) {
void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
MCFixupKind Kind = Fixup.getKind();
MCContext &Ctx = Asm.getContext();
Value = adjustFixupValue(Fixup, Value, Ctx);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index bf3b290b7ed53..8ebde3b9b7a4e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -40,7 +40,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 0cd4aebe4d164..7caeb08589af6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -50,6 +50,8 @@ void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMt() {}
+void MipsTargetStreamer::emitDirectiveSetNoMt() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) {
forbidModuleDirective();
@@ -118,6 +120,7 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg() {
}
void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {}
void MipsTargetStreamer::emitDirectiveModuleHardFloat() {}
+void MipsTargetStreamer::emitDirectiveModuleMT() {}
void MipsTargetStreamer::emitDirectiveSetFp(
MipsABIFlagsSection::FpABIKind Value) {
forbidModuleDirective();
@@ -190,6 +193,21 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1,
emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI);
}
+void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0,
+ unsigned Reg1, int16_t Imm0, int16_t Imm1,
+ int16_t Imm2, SMLoc IDLoc,
+ const MCSubtargetInfo *STI) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ TmpInst.addOperand(MCOperand::createReg(Reg0));
+ TmpInst.addOperand(MCOperand::createReg(Reg1));
+ TmpInst.addOperand(MCOperand::createImm(Imm0));
+ TmpInst.addOperand(MCOperand::createImm(Imm1));
+ TmpInst.addOperand(MCOperand::createImm(Imm2));
+ TmpInst.setLoc(IDLoc);
+ getStreamer().EmitInstruction(TmpInst, *STI);
+}
+
void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg,
unsigned TrgReg, bool Is64Bit,
const MCSubtargetInfo *STI) {
@@ -392,6 +410,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() {
MipsTargetStreamer::emitDirectiveSetNoMsa();
}
+void MipsTargetAsmStreamer::emitDirectiveSetMt() {
+ OS << "\t.set\tmt\n";
+ MipsTargetStreamer::emitDirectiveSetMt();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoMt() {
+ OS << "\t.set\tnomt\n";
+ MipsTargetStreamer::emitDirectiveSetNoMt();
+}
+
void MipsTargetAsmStreamer::emitDirectiveSetAt() {
OS << "\t.set\tat\n";
MipsTargetStreamer::emitDirectiveSetAt();
@@ -656,6 +684,10 @@ void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() {
OS << "\t.module\thardfloat\n";
}
+void MipsTargetAsmStreamer::emitDirectiveModuleMT() {
+ OS << "\t.module\tmt\n";
+}
+
// This part is for ELF object output.
MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index f24761d7d1013..d2f0fdcc6cc11 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -188,6 +188,8 @@ def FeatureUseTCCInDIV : SubtargetFeature<
def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
"Disable 4-operand madd.fmt and related instructions">;
+def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 40078fb771442..89a5854bede0f 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -240,7 +240,8 @@ def HasMSA : Predicate<"Subtarget->hasMSA()">,
AssemblerPredicate<"FeatureMSA">;
def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
AssemblerPredicate<"!FeatureMadd4">;
-
+def HasMT : Predicate<"Subtarget->hasMT()">,
+ AssemblerPredicate<"FeatureMT">;
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
@@ -382,6 +383,10 @@ class ASE_MSA64 {
list<Predicate> InsnPredicates = [HasMSA, HasMips64];
}
+class ASE_MT {
+ list <Predicate> InsnPredicates = [HasMT];
+}
+
// Class used for separating microMIPSr6 and microMIPS (r3) instruction.
// It can be used only on instructions that doesn't inherit PredicateControl.
class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl {
@@ -2919,6 +2924,10 @@ include "MipsMSAInstrInfo.td"
include "MipsEVAInstrFormats.td"
include "MipsEVAInstrInfo.td"
+// MT
+include "MipsMTInstrFormats.td"
+include "MipsMTInstrInfo.td"
+
// Micromips
include "MicroMipsInstrFormats.td"
include "MicroMipsInstrInfo.td"
diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td
new file mode 100644
index 0000000000000..edc0981e6278a
--- /dev/null
+++ b/lib/Target/Mips/MipsMTInstrFormats.td
@@ -0,0 +1,99 @@
+//===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe the MIPS MT instructions format
+//
+// opcode - operation code.
+// rt - destination register
+//
+//===----------------------------------------------------------------------===//
+
+class MipsMTInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl {
+ let DecoderNamespace = "Mips";
+ let EncodingPredicates = [HasStdEnc];
+}
+
+class OPCODE1<bits<1> Val> {
+ bits<1> Value = Val;
+}
+
+def OPCODE_SC_D : OPCODE1<0b0>;
+def OPCODE_SC_E : OPCODE1<0b1>;
+
+class FIELD5<bits<5> Val> {
+ bits<5> Value = Val;
+}
+
+def FIELD5_1_DMT_EMT : FIELD5<0b00001>;
+def FIELD5_2_DMT_EMT : FIELD5<0b01111>;
+def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>;
+def FIELD5_MFTR : FIELD5<0b01000>;
+def FIELD5_MTTR : FIELD5<0b01100>;
+
+class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst {
+ bits<32> Inst;
+
+ bits<5> rt;
+ let Inst{31-26} = 0b010000; // COP0
+ let Inst{25-21} = 0b01011; // MFMC0
+ let Inst{20-16} = rt;
+ let Inst{15-11} = Op1.Value;
+ let Inst{10-6} = Op2.Value;
+ let Inst{5} = sc.Value;
+ let Inst{4-3} = 0b00;
+ let Inst{2-0} = 0b001;
+}
+
+class COP0_MFTTR_MT<FIELD5 Op> : MipsMTInst {
+ bits<32> Inst;
+
+ bits<5> rt;
+ bits<5> rd;
+ bits<1> u;
+ bits<1> h;
+ bits<3> sel;
+ let Inst{31-26} = 0b010000; // COP0
+ let Inst{25-21} = Op.Value; // MFMC0
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00000; // rx - currently unsupported.
+ let Inst{5} = u;
+ let Inst{4} = h;
+ let Inst{3} = 0b0;
+ let Inst{2-0} = sel;
+}
+
+class SPECIAL3_MT_FORK : MipsMTInst {
+ bits<32> Inst;
+
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+ let Inst{31-26} = 0b011111; // SPECIAL3
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00000;
+ let Inst{5-0} = 0b001000; // FORK
+}
+
+class SPECIAL3_MT_YIELD : MipsMTInst {
+ bits<32> Inst;
+
+ bits<5> rs;
+ bits<5> rd;
+ let Inst{31-26} = 0b011111; // SPECIAL3
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00000;
+ let Inst{5-0} = 0b001001; // FORK
+}
diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td
new file mode 100644
index 0000000000000..72e626cbec40a
--- /dev/null
+++ b/lib/Target/Mips/MipsMTInstrInfo.td
@@ -0,0 +1,208 @@
+//===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MIPS MT ASE as defined by MD00378 1.12.
+//
+// TODO: Add support for the microMIPS encodings for the MT ASE and add the
+// instruction mappings.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MIPS MT Instruction Encodings
+//===----------------------------------------------------------------------===//
+
+class DMT_ENC : COP0_MFMC0_MT<FIELD5_1_DMT_EMT, FIELD5_2_DMT_EMT,
+ OPCODE_SC_D>;
+
+class EMT_ENC : COP0_MFMC0_MT<FIELD5_1_DMT_EMT, FIELD5_2_DMT_EMT,
+ OPCODE_SC_E>;
+
+class DVPE_ENC : COP0_MFMC0_MT<FIELD5_1_2_DVPE_EVPE, FIELD5_1_2_DVPE_EVPE,
+ OPCODE_SC_D>;
+
+class EVPE_ENC : COP0_MFMC0_MT<FIELD5_1_2_DVPE_EVPE, FIELD5_1_2_DVPE_EVPE,
+ OPCODE_SC_E>;
+
+class FORK_ENC : SPECIAL3_MT_FORK;
+
+class YIELD_ENC : SPECIAL3_MT_YIELD;
+
+class MFTR_ENC : COP0_MFTTR_MT<FIELD5_MFTR>;
+
+class MTTR_ENC : COP0_MFTTR_MT<FIELD5_MTTR>;
+
+//===----------------------------------------------------------------------===//
+// MIPS MT Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+class MT_1R_DESC_BASE<string instr_asm, InstrItinClass Itin = NoItinerary> {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins);
+ string AsmString = !strconcat(instr_asm, "\t$rt");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = Itin;
+}
+
+class MFTR_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
+ string AsmString = "mftr\t$rd, $rt, $u, $sel, $h";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_MFTR;
+}
+
+class MTTR_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
+ string AsmString = "mttr\t$rt, $rd, $u, $sel, $h";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_MTTR;
+}
+
+class FORK_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rt);
+ string AsmString = "fork\t$rd, $rs, $rt";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_FORK;
+}
+
+class YIELD_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rs);
+ string AsmString = "yield\t$rd, $rs";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_YIELD;
+}
+
+class DMT_DESC : MT_1R_DESC_BASE<"dmt", II_DMT>;
+
+class EMT_DESC : MT_1R_DESC_BASE<"emt", II_EMT>;
+
+class DVPE_DESC : MT_1R_DESC_BASE<"dvpe", II_DVPE>;
+
+class EVPE_DESC : MT_1R_DESC_BASE<"evpe", II_EVPE>;
+
+//===----------------------------------------------------------------------===//
+// MIPS MT Instruction Definitions
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 1, isNotDuplicable = 1,
+ AdditionalPredicates = [NotInMicroMips] in {
+ def DMT : DMT_ENC, DMT_DESC, ASE_MT;
+
+ def EMT : EMT_ENC, EMT_DESC, ASE_MT;
+
+ def DVPE : DVPE_ENC, DVPE_DESC, ASE_MT;
+
+ def EVPE : EVPE_ENC, EVPE_DESC, ASE_MT;
+
+ def FORK : FORK_ENC, FORK_DESC, ASE_MT;
+
+ def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT;
+
+ def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT;
+
+ def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT;
+}
+
+//===----------------------------------------------------------------------===//
+// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases.
+//===----------------------------------------------------------------------===//
+def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt,
+ uimm3:$sel),
+ "mftc0 $rd, $rt, $sel">, ASE_MT;
+
+def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt,
+ uimm3:$sel),
+ "mftgpr $rd, $rt">, ASE_MT;
+
+def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+ "mftlo $rt, $ac">, ASE_MT;
+
+def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+ "mfthi $rt, $ac">, ASE_MT;
+
+def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
+ "mftacx $rt, $ac">, ASE_MT;
+
+def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins),
+ "mftdsp $rt">, ASE_MT;
+
+def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
+ "mftc1 $rt, $ft">, ASE_MT;
+
+def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
+ "mfthc1 $rt, $ft">, ASE_MT;
+
+def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft),
+ "cftc1 $rt, $ft">, ASE_MT;
+
+
+def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt,
+ uimm3:$sel),
+ "mttc0 $rt, $rd, $sel">, ASE_MT;
+
+def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd),
+ "mttgpr $rd, $rt">, ASE_MT;
+
+def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+ "mttlo $rt, $ac">, ASE_MT;
+
+def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+ "mtthi $rt, $ac">, ASE_MT;
+
+def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
+ "mttacx $rt, $ac">, ASE_MT;
+
+def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt),
+ "mttdsp $rt">, ASE_MT;
+
+def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
+ "mttc1 $rt, $ft">, ASE_MT;
+
+def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
+ "mtthc1 $rt, $ft">, ASE_MT;
+
+def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt),
+ "cttc1 $rt, $ft">, ASE_MT;
+
+//===----------------------------------------------------------------------===//
+// MIPS MT Instruction Definitions
+//===----------------------------------------------------------------------===//
+
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsInstAlias<"dmt", (DMT ZERO), 1>, ASE_MT;
+
+ def : MipsInstAlias<"emt", (EMT ZERO), 1>, ASE_MT;
+
+ def : MipsInstAlias<"dvpe", (DVPE ZERO), 1>, ASE_MT;
+
+ def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT;
+
+ def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0),
+ 1>, ASE_MT;
+
+ def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0),
+ 1>, ASE_MT;
+
+ def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT;
+
+ def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT;
+}
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index c0de59ba15f55..8ec55ab6284da 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -84,6 +84,7 @@ def II_DIVU : InstrItinClass;
def II_DIV_D : InstrItinClass;
def II_DIV_S : InstrItinClass;
def II_DMFC0 : InstrItinClass;
+def II_DMT : InstrItinClass;
def II_DMTC0 : InstrItinClass;
def II_DMFC1 : InstrItinClass;
def II_DMTC1 : InstrItinClass;
@@ -113,8 +114,12 @@ def II_DSBH : InstrItinClass;
def II_DSHD : InstrItinClass;
def II_DSUBU : InstrItinClass;
def II_DSUB : InstrItinClass;
+def II_DVPE : InstrItinClass;
+def II_EMT : InstrItinClass;
+def II_EVPE : InstrItinClass;
def II_EXT : InstrItinClass; // Any EXT instruction
def II_FLOOR : InstrItinClass;
+def II_FORK : InstrItinClass;
def II_INS : InstrItinClass; // Any INS instruction
def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo.
def II_J : InstrItinClass;
@@ -221,6 +226,7 @@ def II_MFC1 : InstrItinClass;
def II_MFHC1 : InstrItinClass;
def II_MFC2 : InstrItinClass;
def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo
+def II_MFTR : InstrItinClass;
def II_MOD : InstrItinClass;
def II_MODU : InstrItinClass;
def II_MOVE : InstrItinClass;
@@ -250,6 +256,7 @@ def II_MTC1 : InstrItinClass;
def II_MTHC1 : InstrItinClass;
def II_MTC2 : InstrItinClass;
def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo
+def II_MTTR : InstrItinClass;
def II_MUL : InstrItinClass;
def II_MUH : InstrItinClass;
def II_MUHU : InstrItinClass;
@@ -345,6 +352,7 @@ def II_WRPGPR : InstrItinClass;
def II_RDPGPR : InstrItinClass;
def II_DVP : InstrItinClass;
def II_EVP : InstrItinClass;
+def II_YIELD : InstrItinClass;
//===----------------------------------------------------------------------===//
// Mips Generic instruction itineraries.
@@ -386,6 +394,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_DCLZ , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DMOD , [InstrStage<17, [IMULDIV]>]>,
InstrItinData<II_DMODU , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<II_DMT , [InstrStage<2, [ALU]>]>,
InstrItinData<II_DSLL , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DSLL32 , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DSRL , [InstrStage<1, [ALU]>]>,
@@ -404,7 +413,11 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_DSHD , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DCLO , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DCLZ , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_DVPE , [InstrStage<2, [ALU]>]>,
+ InstrItinData<II_EMT , [InstrStage<2, [ALU]>]>,
+ InstrItinData<II_EVPE , [InstrStage<2, [ALU]>]>,
InstrItinData<II_EXT , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_FORK , [InstrStage<1, [ALU]>]>,
InstrItinData<II_INS , [InstrStage<1, [ALU]>]>,
InstrItinData<II_LUI , [InstrStage<1, [ALU]>]>,
InstrItinData<II_MOVE , [InstrStage<1, [ALU]>]>,
@@ -653,12 +666,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_MFHC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFC2 , [InstrStage<2, [ALU]>]>,
+ InstrItinData<II_MFTR , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC2 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>,
+ InstrItinData<II_MTTR , [InstrStage<2, [ALU]>]>,
InstrItinData<II_CACHE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_PREF , [InstrStage<1, [ALU]>]>,
InstrItinData<II_CACHEE , [InstrStage<1, [ALU]>]>,
@@ -670,5 +685,6 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_WRPGPR , [InstrStage<1, [ALU]>]>,
InstrItinData<II_RDPGPR , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DVP , [InstrStage<1, [ALU]>]>,
- InstrItinData<II_EVP , [InstrStage<1, [ALU]>]>
+ InstrItinData<II_EVP , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_YIELD , [InstrStage<5, [ALU]>]>
]>;
diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td
index 15a0401b781e5..89cda676441e7 100644
--- a/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/lib/Target/Mips/MipsScheduleGeneric.td
@@ -187,7 +187,11 @@ def GenericIssueCOP0 : ProcResource<1> { let Super = GenericCOP0; }
def GenericWriteCOP0TLB : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 4; }
def GenericWriteCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 3; }
def GenericReadCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 2; }
-def GnereicReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>;
+def GenericReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>;
+def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> {
+ let Latency = 5;
+}
+def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>;
def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>;
def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>;
@@ -261,6 +265,14 @@ def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE,
def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>;
+// MIPS MT instructions
+// ====================
+
+def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>;
+
+def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>;
+def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>;
+
// MIPS32R6 and MIPS16e
// ====================
diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td
index 882a241d1426a..fedfac24e4e74 100644
--- a/lib/Target/Mips/MipsScheduleP5600.td
+++ b/lib/Target/Mips/MipsScheduleP5600.td
@@ -19,7 +19,7 @@ def MipsP5600Model : SchedMachineModel {
HasMips64, HasMips64r2, HasCnMips,
InMicroMips, InMips16Mode,
HasMicroMips32r6, HasMicroMips64r6,
- HasDSP, HasDSPR2];
+ HasDSP, HasDSPR2, HasMT];
}
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 154d5825427b3..eba21e0a1c672 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,7 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
- HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(),
+ HasEVA(false), DisableMadd4(false), HasMT(false), TM(TM),
+ TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ccd47f00c0d3b..7619e7b08612b 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -149,6 +149,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// related instructions.
bool DisableMadd4;
+ // HasMT -- support MT ASE.
+ bool HasMT;
+
InstrItineraryData InstrItins;
// We can override the determination of whether we are in mips16 mode
@@ -259,6 +262,7 @@ public:
bool hasMSA() const { return HasMSA; }
bool disableMadd4() const { return DisableMadd4; }
bool hasEVA() const { return HasEVA; }
+ bool hasMT() const { return HasMT; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 41ebe411b98d9..af24838665e1f 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -40,6 +40,8 @@ public:
virtual void emitDirectiveSetNoMacro();
virtual void emitDirectiveSetMsa();
virtual void emitDirectiveSetNoMsa();
+ virtual void emitDirectiveSetMt();
+ virtual void emitDirectiveSetNoMt();
virtual void emitDirectiveSetAt();
virtual void emitDirectiveSetAtWithArg(unsigned RegNo);
virtual void emitDirectiveSetNoAt();
@@ -96,6 +98,7 @@ public:
virtual void emitDirectiveModuleOddSPReg();
virtual void emitDirectiveModuleSoftFloat();
virtual void emitDirectiveModuleHardFloat();
+ virtual void emitDirectiveModuleMT();
virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
virtual void emitDirectiveSetOddSPReg();
virtual void emitDirectiveSetNoOddSPReg();
@@ -116,6 +119,9 @@ public:
SMLoc IDLoc, const MCSubtargetInfo *STI);
void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
SMLoc IDLoc, const MCSubtargetInfo *STI);
+ void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0,
+ int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
+ const MCSubtargetInfo *STI);
void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit,
const MCSubtargetInfo *STI);
void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
@@ -204,6 +210,8 @@ public:
void emitDirectiveSetNoMacro() override;
void emitDirectiveSetMsa() override;
void emitDirectiveSetNoMsa() override;
+ void emitDirectiveSetMt() override;
+ void emitDirectiveSetNoMt() override;
void emitDirectiveSetAt() override;
void emitDirectiveSetAtWithArg(unsigned RegNo) override;
void emitDirectiveSetNoAt() override;
@@ -267,6 +275,7 @@ public:
void emitDirectiveModuleOddSPReg() override;
void emitDirectiveModuleSoftFloat() override;
void emitDirectiveModuleHardFloat() override;
+ void emitDirectiveModuleMT() override;
void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
void emitDirectiveSetOddSPReg() override;
void emitDirectiveSetNoOddSPReg() override;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f26b9a7cb8dd3..f800d91f40933 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -62,7 +62,6 @@
#include <utility>
#include <vector>
-#undef DEBUG_TYPE
#define DEBUG_TYPE "nvptx-lower"
using namespace llvm;
@@ -2456,7 +2455,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// v2f16 was loaded as an i32. Now we must bitcast it back.
else if (EltVT == MVT::v2f16)
Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
- // Extend the element if necesary (e.g. an i8 is loaded
+ // Extend the element if necessary (e.g. an i8 is loaded
// into an i16 register)
if (Ins[InsIdx].VT.isInteger() &&
Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 3be291b48b8f2..989f0a3aba2f6 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -42,6 +43,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<StackProtector>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnFunction(Function &F) override;
@@ -61,6 +63,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
// Collect all aggregate loads and mem* calls.
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
@@ -104,15 +108,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
Value *SrcAddr = LI->getOperand(0);
Value *DstAddr = SI->getOperand(1);
unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
- Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
-
- createMemCpyLoop(/* ConvertedInst */ SI,
- /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
- /* CopyLen */ CopyLen,
- /* SrcAlign */ LI->getAlignment(),
- /* DestAlign */ SI->getAlignment(),
- /* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile());
+ ConstantInt *CopyLen =
+ ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
+
+ if (!TTI.useWideIRMemcpyLoopLowering()) {
+ createMemCpyLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcAlign */ LI->getAlignment(),
+ /* DestAlign */ SI->getAlignment(),
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile());
+ } else {
+ createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcAlign */ LI->getAlignment(),
+ /* DestAlign */ SI->getAlignment(),
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile(), TTI);
+ }
SI->eraseFromParent();
LI->eraseFromParent();
@@ -121,7 +136,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// Transform mem* intrinsic calls.
for (MemIntrinsic *MemCall : MemCalls) {
if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
- expandMemCpyAsLoop(Memcpy);
+ expandMemCpyAsLoop(Memcpy, TTI);
} else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
expandMemMoveAsLoop(Memmove);
} else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 7393f3d7a08a9..bdad2fe8714fd 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -115,7 +115,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsResolved) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 094d3e6a61b5a..53f33ac1fc0ed 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -607,7 +607,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// The old condition may be dead now, and may have even created a dead PHI
// (the original induction variable).
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- DeleteDeadPHIs(CountedExitBlock);
+ // Run through the basic blocks of the loop and see if any of them have dead
+ // PHIs that can be removed.
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
++NumCTRLoops;
return MadeChange;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index c2c115cb6dafa..b49c3345a17dd 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- // If we are a leaf function, and use up to 224 bytes of stack space,
- // don't have a frame pointer, calls, or dynamic alloca then we do not need
- // to adjust the stack pointer (we fit in the Red Zone).
- // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
- // stackless code if all local vars are reg-allocated.
- bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
unsigned LR = RegInfo->getRARegister();
- if (!DisableRedZone &&
- (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
- !Subtarget.isSVR4ABI() || // allocated locals.
- FrameSize == 0) &&
- FrameSize <= 224 && // Fits in red zone.
- !MFI.hasVarSizedObjects() && // No dynamic alloca.
- !MFI.adjustsStack() && // No calls.
- !MustSaveLR(MF, LR) &&
- !RegInfo->hasBasePointer(MF)) { // No special alignment.
+ bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
+ !MFI.adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) && // No need to save LR.
+ !RegInfo->hasBasePointer(MF); // No special alignment.
+
+ // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // code if all local vars are reg-allocated.
+ bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
+
+ // Check whether we can skip adjusting the stack pointer (by using red zone)
+ if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -1869,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
if (HasVRSaveArea) {
- // Insert alignment padding, we need 16-byte alignment.
- LowerBound = (LowerBound - 15) & ~(15);
+ // Insert alignment padding, we need 16-byte alignment. Note: for postive
+ // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
+ // we are using negative number here (the stack grows downward). We should
+ // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
+ // is the alignment size ( n = 16 here) and y is the size after aligning.
+ assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
+ LowerBound &= ~(15);
for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
int FI = VRegs[i].getFrameIdx();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 535b9deaefac3..3aaf7ef2c2a02 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -419,25 +419,6 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
.getNode();
}
-/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// sign extension from a 16-bit value. If so, this returns true and the
-/// immediate.
-static bool isIntS16Immediate(SDNode *N, short &Imm) {
- if (N->getOpcode() != ISD::Constant)
- return false;
-
- Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
- if (N->getValueType(0) == MVT::i32)
- return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
- else
- return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
-}
-
-static bool isIntS16Immediate(SDValue Op, short &Imm) {
- return isIntS16Immediate(Op.getNode(), Imm);
-}
-
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
@@ -728,7 +709,10 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) {
static unsigned getInt64Count(int64_t Imm) {
unsigned Count = getInt64CountDirect(Imm);
- if (Count == 1)
+
+ // If the instruction count is 1 or 2, we do not need further analysis
+ // since rotate + load constant requires at least 2 instructions.
+ if (Count <= 2)
return Count;
for (unsigned r = 1; r < 63; ++r) {
@@ -838,7 +822,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) {
unsigned Count = getInt64CountDirect(Imm);
- if (Count == 1)
+
+ // If the instruction count is 1 or 2, we do not need further analysis
+ // since rotate + load constant requires at least 2 instructions.
+ if (Count <= 2)
return getInt64Direct(CurDAG, dl, Imm);
unsigned RMin = 0;
@@ -2126,7 +2113,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
getI32Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLW;
} else {
- short SImm;
+ int16_t SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm((int)SImm & 0xFFFF,
@@ -2173,7 +2160,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
getI64Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLD;
} else {
- short SImm;
+ int16_t SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI64Imm(SImm & 0xFFFF, dl)),
@@ -3323,7 +3310,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (tryLogicOpOfCompares(N))
return;
- short Imm;
+ int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
KnownBits LHSKnown;
@@ -3346,7 +3333,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
break;
}
case ISD::ADD: {
- short Imm;
+ int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
@@ -4034,11 +4021,13 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
O0.getNode(), O1.getNode());
};
+ // FIXME: When the semantics of the interaction between select and undef
+ // are clearly defined, it may turn out to be unnecessary to break here.
SDValue TrueRes = TryFold(ConstTrue);
- if (!TrueRes)
+ if (!TrueRes || TrueRes.isUndef())
break;
SDValue FalseRes = TryFold(ConstFalse);
- if (!FalseRes)
+ if (!FalseRes || FalseRes.isUndef())
break;
// For us to materialize these using one instruction, we must be able to
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 72f14e9691382..0e069ec1665f7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -136,6 +136,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}
+ // Match BITREVERSE to customized fast code sequence in the td file.
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -1168,6 +1172,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
+ case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -2028,17 +2033,17 @@ int PPC::isQVALIGNIShuffleMask(SDNode *N) {
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
-static bool isIntS16Immediate(SDNode *N, short &Imm) {
+bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
if (!isa<ConstantSDNode>(N))
return false;
- Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
if (N->getValueType(0) == MVT::i32)
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
else
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
}
-static bool isIntS16Immediate(SDValue Op, short &Imm) {
+bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
@@ -2048,7 +2053,7 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
- short imm = 0;
+ int16_t imm = 0;
if (N.getOpcode() == ISD::ADD) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i
@@ -2138,7 +2143,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return false;
if (N.getOpcode() == ISD::ADD) {
- short imm = 0;
+ int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
@@ -2162,7 +2167,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return true; // [&g+r]
}
} else if (N.getOpcode() == ISD::OR) {
- short imm = 0;
+ int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
@@ -2190,7 +2195,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
- short Imm;
+ int16_t Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
@@ -2235,10 +2240,15 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
if (SelectAddressRegReg(N, Base, Index, DAG))
return true;
- // If the operand is an addition, always emit this as [r+r], since this is
- // better (for code size, and execution, as the memop does the add for free)
- // than emitting an explicit add.
- if (N.getOpcode() == ISD::ADD) {
+ // If the address is the result of an add, we will utilize the fact that the
+ // address calculation includes an implicit add. However, we can reduce
+ // register pressure if we do not materialize a constant just for use as the
+ // index register. We only get rid of the add if it is not an add of a
+ // value and a 16-bit signed constant and both have a single use.
+ int16_t imm = 0;
+ if (N.getOpcode() == ISD::ADD &&
+ (!isIntS16Immediate(N.getOperand(1), imm) ||
+ !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
@@ -6422,7 +6432,7 @@ PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- // Get the corect type for integers.
+ // Get the correct type for integers.
EVT IntVT = Op.getValueType();
// Get the inputs.
@@ -6439,7 +6449,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
// When we pop the dynamic allocation we need to restore the SP link.
SDLoc dl(Op);
- // Get the corect type for pointers.
+ // Get the correct type for pointers.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Construct the stack pointer operand.
@@ -6514,7 +6524,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Size = Op.getOperand(1);
SDLoc dl(Op);
- // Get the corect type for pointers.
+ // Get the correct type for pointers.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
@@ -6645,6 +6655,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
+ LLVM_FALLTHROUGH;
case ISD::SETEQ:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
@@ -6656,6 +6667,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ LLVM_FALLTHROUGH;
case ISD::SETOGE:
case ISD::SETGE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
@@ -6664,6 +6676,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETUGT:
case ISD::SETGT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ LLVM_FALLTHROUGH;
case ISD::SETOLE:
case ISD::SETLE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
@@ -6677,6 +6690,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
+ LLVM_FALLTHROUGH;
case ISD::SETEQ:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
@@ -11311,6 +11325,132 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// This function adds the required vector_shuffle needed to get
+// the elements of the vector extract in the correct position
+// as specified by the CorrectElems encoding.
+static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
+ SDValue Input, uint64_t Elems,
+ uint64_t CorrectElems) {
+ SDLoc dl(N);
+
+ unsigned NumElems = Input.getValueType().getVectorNumElements();
+ SmallVector<int, 16> ShuffleMask(NumElems, -1);
+
+ // Knowing the element indices being extracted from the original
+ // vector and the order in which they're being inserted, just put
+ // them at element indices required for the instruction.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (DAG.getDataLayout().isLittleEndian())
+ ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
+ else
+ ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
+ CorrectElems = CorrectElems >> 8;
+ Elems = Elems >> 8;
+ }
+
+ SDValue Shuffle =
+ DAG.getVectorShuffle(Input.getValueType(), dl, Input,
+ DAG.getUNDEF(Input.getValueType()), ShuffleMask);
+
+ EVT Ty = N->getValueType(0);
+ SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
+ return BV;
+}
+
+// Look for build vector patterns where input operands come from sign
+// extended vector_extract elements of specific indices. If the correct indices
+// aren't used, add a vector shuffle to fix up the indices and create a new
+// PPCISD:SExtVElems node which selects the vector sign extend instructions
+// during instruction selection.
+static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
+ // This array encodes the indices that the vector sign extend instructions
+ // extract from when extending from one type to another for both BE and LE.
+ // The right nibble of each byte corresponds to the LE incides.
+ // and the left nibble of each byte corresponds to the BE incides.
+ // For example: 0x3074B8FC byte->word
+ // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
+ // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
+ // For example: 0x000070F8 byte->double word
+ // For LE: the allowed indices are: 0x0,0x8
+ // For BE: the allowed indices are: 0x7,0xF
+ uint64_t TargetElems[] = {
+ 0x3074B8FC, // b->w
+ 0x000070F8, // b->d
+ 0x10325476, // h->w
+ 0x00003074, // h->d
+ 0x00001032, // w->d
+ };
+
+ uint64_t Elems = 0;
+ int Index;
+ SDValue Input;
+
+ auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
+ if (!Op)
+ return false;
+ if (Op.getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ SDValue Extract = Op.getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (!ExtOp)
+ return false;
+
+ Index = ExtOp->getZExtValue();
+ if (Input && Input != Extract.getOperand(0))
+ return false;
+
+ if (!Input)
+ Input = Extract.getOperand(0);
+
+ Elems = Elems << 8;
+ Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
+ Elems |= Index;
+
+ return true;
+ };
+
+ // If the build vector operands aren't sign extended vector extracts,
+ // of the same input vector, then return.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (!isSExtOfVecExtract(N->getOperand(i))) {
+ return SDValue();
+ }
+ }
+
+ // If the vector extract indicies are not correct, add the appropriate
+ // vector_shuffle.
+ int TgtElemArrayIdx;
+ int InputSize = Input.getValueType().getScalarSizeInBits();
+ int OutputSize = N->getValueType(0).getScalarSizeInBits();
+ if (InputSize + OutputSize == 40)
+ TgtElemArrayIdx = 0;
+ else if (InputSize + OutputSize == 72)
+ TgtElemArrayIdx = 1;
+ else if (InputSize + OutputSize == 48)
+ TgtElemArrayIdx = 2;
+ else if (InputSize + OutputSize == 80)
+ TgtElemArrayIdx = 3;
+ else if (InputSize + OutputSize == 96)
+ TgtElemArrayIdx = 4;
+ else
+ return SDValue();
+
+ uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
+ CorrectElems = DAG.getDataLayout().isLittleEndian()
+ ? CorrectElems & 0x0F0F0F0F0F0F0F0F
+ : CorrectElems & 0xF0F0F0F0F0F0F0F0;
+ if (Elems != CorrectElems) {
+ return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
+ }
+
+ // Regular lowering will catch cases where a shuffle is not needed.
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -11338,6 +11478,15 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
if (Reduced)
return Reduced;
+ // If we're building a vector out of extended elements from another vector
+ // we have P9 vector integer extend instructions.
+ if (Subtarget.hasP9Altivec()) {
+ Reduced = combineBVOfVecSExt(N, DAG);
+ if (Reduced)
+ return Reduced;
+ }
+
+
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index a5108727bb4b1..821927d3b1576 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -67,6 +67,10 @@ namespace llvm {
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
+ /// SExtVElems, takes an input vector of a smaller type and sign
+ /// extends to an output vector of a larger type.
+ SExtVElems,
+
/// Reciprocal estimate instructions (unary FP ops).
FRE, FRSQRTE,
@@ -1092,6 +1096,9 @@ namespace llvm {
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+ bool isIntS16Immediate(SDNode *N, int16_t &Imm);
+ bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+
} // end namespace llvm
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 47d59c25392a2..6d9f55206b6af 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -32,6 +32,9 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
+def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -131,6 +134,7 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
+def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
@@ -4450,3 +4454,190 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>;
def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>;
} // IsISA3_0
+
+// Fast 32-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]):
+// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes):
+// n' = (n rotl 24); After which n' = [B4, B1, B2, B3]
+// Step 4.2: Insert B3 to the right position:
+// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3]
+// Step 4.3: Insert B1 to the right position:
+// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1]
+def MaskValues {
+ dag Lo1 = (ORI (LIS 0x5555), 0x5555);
+ dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA);
+ dag Lo2 = (ORI (LIS 0x3333), 0x3333);
+ dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC);
+ dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F);
+ dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0);
+}
+
+def Shift1 {
+ dag Right = (RLWINM $A, 31, 1, 31);
+ dag Left = (RLWINM $A, 1, 0, 30);
+}
+
+def Swap1 {
+ dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1),
+ (AND Shift1.Left, MaskValues.Hi1));
+}
+
+def Shift2 {
+ dag Right = (RLWINM Swap1.Bit, 30, 2, 31);
+ dag Left = (RLWINM Swap1.Bit, 2, 0, 29);
+}
+
+def Swap2 {
+ dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2),
+ (AND Shift2.Left, MaskValues.Hi2));
+}
+
+def Shift4 {
+ dag Right = (RLWINM Swap2.Bits, 28, 4, 31);
+ dag Left = (RLWINM Swap2.Bits, 4, 0, 27);
+}
+
+def Swap4 {
+ dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4),
+ (AND Shift4.Left, MaskValues.Hi4));
+}
+
+def Rotate {
+ dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31);
+}
+
+def RotateInsertByte3 {
+ dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15);
+}
+
+def RotateInsertByte1 {
+ dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
+}
+
+def : Pat<(i32 (bitreverse i32:$A)),
+ (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+
+// Fast 64-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]):
+// Apply the same byte reverse algorithm mentioned above for the fast 32-bit
+// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And
+// then OR them together to get the final result.
+def MaskValues64 {
+ dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32));
+ dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32));
+ dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32));
+ dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32));
+ dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32));
+ dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32));
+}
+
+def DWMaskValues {
+ dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555);
+ dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA);
+ dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333);
+ dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC);
+ dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F);
+ dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0);
+}
+
+def DWShift1 {
+ dag Right = (RLDICL $A, 63, 1);
+ dag Left = (RLDICR $A, 1, 62);
+}
+
+def DWSwap1 {
+ dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1),
+ (AND8 DWShift1.Left, DWMaskValues.Hi1));
+}
+
+def DWShift2 {
+ dag Right = (RLDICL DWSwap1.Bit, 62, 2);
+ dag Left = (RLDICR DWSwap1.Bit, 2, 61);
+}
+
+def DWSwap2 {
+ dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2),
+ (AND8 DWShift2.Left, DWMaskValues.Hi2));
+}
+
+def DWShift4 {
+ dag Right = (RLDICL DWSwap2.Bits, 60, 4);
+ dag Left = (RLDICR DWSwap2.Bits, 4, 59);
+}
+
+def DWSwap4 {
+ dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4),
+ (AND8 DWShift4.Left, DWMaskValues.Hi4));
+}
+
+// Bit swap is done, now start byte swap.
+def DWExtractLo32 {
+ dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32));
+}
+
+def DWRotateLo32 {
+ dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31);
+}
+
+def DWLo32RotateInsertByte3 {
+ dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15);
+}
+
+// Lower 32 bits in the right order
+def DWLo32RotateInsertByte1 {
+ dag Left =
+ (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31);
+}
+
+def ExtendLo32 {
+ dag To64Bit =
+ (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ DWLo32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftHi32 { // SRDI DWSwap4.Bits, 32)
+ dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32);
+}
+
+def DWExtractHi32 {
+ dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32));
+}
+
+def DWRotateHi32 {
+ dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31);
+}
+
+def DWHi32RotateInsertByte3 {
+ dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15);
+}
+
+// High 32 bits in the right order, but in the low 32-bit position
+def DWHi32RotateInsertByte1 {
+ dag Left =
+ (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31);
+}
+
+def ExtendHi32 {
+ dag To64Bit =
+ (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ DWHi32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32
+ dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31);
+}
+
+def : Pat<(i64 (bitreverse i64:$A)),
+ (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 9cfc897cdb3f2..43635a8919e2b 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1901,6 +1901,98 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+// Variable index unsigned vector_extract on Power9
+let Predicates = [HasP9Altivec, IsLittleEndian] in {
+ def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBRX $Idx, $S)>;
+
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHRX (LI8 0), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHRX (LI8 2), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHRX (LI8 4), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHRX (LI8 6), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHRX (LI8 8), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHRX (LI8 10), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHRX (LI8 12), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHRX (LI8 14), $S)>;
+
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWRX (LI8 0), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWRX (LI8 4), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWRX (LI8 8), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWRX (LI8 12), $S)>;
+
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWRX (LI8 0), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWRX (LI8 4), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWRX (LI8 8), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWRX (LI8 12), $S))>;
+}
+let Predicates = [HasP9Altivec, IsBigEndian] in {
+ def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBLX $Idx, $S)>;
+
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHLX (LI8 0), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHLX (LI8 2), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHLX (LI8 4), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHLX (LI8 6), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHLX (LI8 8), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHLX (LI8 10), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHLX (LI8 12), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHLX (LI8 14), $S)>;
+
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWLX (LI8 0), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWLX (LI8 4), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWLX (LI8 8), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWLX (LI8 12), $S)>;
+
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWLX (LI8 0), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWLX (LI8 4), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWLX (LI8 8), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWLX (LI8 12), $S))>;
+}
+
let Predicates = [IsLittleEndian, HasDirectMove] in {
// v16i8 scalar <-> vector conversions (LE)
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
@@ -2729,36 +2821,54 @@ def DblToFlt {
}
def ByteToWord {
- dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
- dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
- dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
- dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8));
}
def ByteToDWord {
- dag A0 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
- dag A1 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8));
}
def HWordToWord {
- dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
- dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
- dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
- dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16));
}
def HWordToDWord {
- dag A0 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
- dag A1 = (i64 (sext_inreg
- (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16));
}
def WordToDWord {
- dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
- dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+ dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
+ dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+ dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1))));
+ dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3))));
}
def FltToIntLoad {
@@ -3016,18 +3126,46 @@ let AddedComplexity = 400 in {
// P9 Altivec instructions that can be used to build vectors.
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
// with complexities of existing build vector patterns in this file.
- let Predicates = [HasP9Altivec] in {
- def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
+ let Predicates = [HasP9Altivec, IsLittleEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
(v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
+ def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
(v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
- HWordToWord.A2, HWordToWord.A3)),
+ def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
(v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
- ByteToWord.A2, ByteToWord.A3)),
+ def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
(v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
+ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
(v2i64 (VEXTSB2D $A))>;
}
+
+ let Predicates = [HasP9Altivec, IsBigEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+ def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+ def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+ def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+ }
+
+ let Predicates = [HasP9Altivec] in {
+ def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
+ (v2i64 (VEXTSB2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
+ (v2i64 (VEXTSH2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
+ (v2i64 (VEXTSW2D $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
+ (v4i32 (VEXTSB2W $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
+ (v4i32 (VEXTSH2W $A))>;
+ }
}
diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td
index a9c1bd78b05e0..a01995a629c29 100644
--- a/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/lib/Target/PowerPC/PPCScheduleP9.td
@@ -260,8 +260,8 @@ let SchedModel = P9Model in {
// ***************** Defining Itinerary Class Resources *****************
- def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple,
- IIC_IntGeneral]>;
+ def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+ [IIC_IntSimple, IIC_IntGeneral]>;
def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
[IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 5a97f595ad8cf..90d11f46a384d 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -272,6 +272,13 @@ public:
return 16;
}
+
+ // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
+ // red zone and PPC64 SVR4ABI has a 288-byte red zone.
+ unsigned getRedZoneSize() const {
+ return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
+ }
+
bool hasHTM() const { return HasHTM; }
bool hasFusion() const { return HasFusion; }
bool hasFloat128() const { return HasFloat128; }
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 491eaf326a508..7d34efd4af3e0 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -195,8 +195,10 @@ public:
return false;
// If we don't have VSX on the subtarget, don't do anything.
+ // Also, on Power 9 the load and store ops preserve element order and so
+ // the swaps are not required.
const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
- if (!STI.hasVSX())
+ if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps())
return false;
bool Changed = false;
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index f85c0cf111c43..be83efc02d278 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -34,7 +34,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -73,7 +73,7 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
return;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index d4454c271f5ac..0d021d67033e5 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -211,6 +211,7 @@ namespace {
case Sparc::fixup_sparc_wplt30:
if (Target.getSymA()->getSymbol().isTemporary())
return false;
+ LLVM_FALLTHROUGH;
case Sparc::fixup_sparc_tls_gd_hi22:
case Sparc::fixup_sparc_tls_gd_lo10:
case Sparc::fixup_sparc_tls_gd_add:
@@ -275,7 +276,7 @@ namespace {
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsResolved) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 6b32a7926437a..51ac410a9c819 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -52,7 +52,7 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -94,7 +94,7 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
MCFixupKind Kind = Fixup.getKind();
unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index fe4b52b515e0c..73a1036f88e0c 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -26,7 +26,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
// This is the limit of processor resource usage at which the
// scheduler should try to look for other instructions (not using the
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index fef4a8c92a362..2801141cd951f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2224,15 +2224,12 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
// Lower a binary operation that produces two VT results, one in each
// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
-// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
-// on the extended Op0 and (unextended) Op1. Store the even register result
+// and Opcode performs the GR128 operation. Store the even register result
// in Even and the odd register result in Odd.
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
- unsigned Extend, unsigned Opcode, SDValue Op0,
- SDValue Op1, SDValue &Even, SDValue &Odd) {
- SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
- SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
- SDValue(In128, 0), Op1);
+ unsigned Opcode, SDValue Op0, SDValue Op1,
+ SDValue &Even, SDValue &Odd) {
+ SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
bool Is32Bit = is32Bit(VT);
Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
@@ -2347,6 +2344,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Handle tests for order using (or (ogt y x) (oge x y)).
case ISD::SETUO:
Invert = true;
+ LLVM_FALLTHROUGH;
case ISD::SETO: {
assert(IsFP && "Unexpected integer comparison");
SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
@@ -2358,6 +2356,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Handle <> tests using (or (ogt y x) (ogt x y)).
case ISD::SETUEQ:
Invert = true;
+ LLVM_FALLTHROUGH;
case ISD::SETONE: {
assert(IsFP && "Unexpected integer comparison");
SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
@@ -2962,7 +2961,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
Op.getOperand(1), Ops[1], Ops[0]);
else {
- // Do a full 128-bit multiplication based on UMUL_LOHI64:
+ // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
//
// (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
//
@@ -2980,10 +2979,10 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SDValue RL = Op.getOperand(1);
SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
- // UMUL_LOHI64 returns the low result in the odd register and the high
- // result in the even register. SMUL_LOHI is defined to return the
- // low half first, so the results are in reverse order.
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ // SystemZISD::UMUL_LOHI returns the low result in the odd register and
+ // the high result in the even register. ISD::SMUL_LOHI is defined to
+ // return the low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
LL, RL, Ops[1], Ops[0]);
SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
@@ -3004,10 +3003,10 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
Op.getOperand(1), Ops[1], Ops[0]);
else
- // UMUL_LOHI64 returns the low result in the odd register and the high
- // result in the even register. UMUL_LOHI is defined to return the
- // low half first, so the results are in reverse order.
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ // SystemZISD::UMUL_LOHI returns the low result in the odd register and
+ // the high result in the even register. ISD::UMUL_LOHI is defined to
+ // return the low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@@ -3018,24 +3017,19 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
SDValue Op1 = Op.getOperand(1);
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned Opcode;
- // We use DSGF for 32-bit division.
- if (is32Bit(VT)) {
+ // We use DSGF for 32-bit division. This means the first operand must
+ // always be 64-bit, and the second operand should be 32-bit whenever
+ // that is possible, to improve performance.
+ if (is32Bit(VT))
Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
- Opcode = SystemZISD::SDIVREM32;
- } else if (DAG.ComputeNumSignBits(Op1) > 32) {
+ else if (DAG.ComputeNumSignBits(Op1) > 32)
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
- Opcode = SystemZISD::SDIVREM32;
- } else
- Opcode = SystemZISD::SDIVREM64;
- // DSG(F) takes a 64-bit dividend, so the even register in the GR128
- // input is "don't care". The instruction returns the remainder in
- // the even register and the quotient in the odd register.
+ // DSG(F) returns the remainder in the even register and the
+ // quotient in the odd register.
SDValue Ops[2];
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
- Op0, Op1, Ops[1], Ops[0]);
+ lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@@ -3044,16 +3038,11 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- // DL(G) uses a double-width dividend, so we need to clear the even
- // register in the GR128 input. The instruction returns the remainder
- // in the even register and the quotient in the odd register.
+ // DL(G) returns the remainder in the even register and the
+ // quotient in the odd register.
SDValue Ops[2];
- if (is32Bit(VT))
- lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
- Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
- else
- lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
- Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@@ -3193,13 +3182,13 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SDLoc DL(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
- SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
- FenceScope == CrossThread) {
+ FenceSSID == SyncScope::System) {
return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
Op.getOperand(0)),
0);
@@ -4669,11 +4658,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
OPCODE(POPCNT);
- OPCODE(UMUL_LOHI64);
- OPCODE(SDIVREM32);
- OPCODE(SDIVREM64);
- OPCODE(UDIVREM32);
- OPCODE(UDIVREM64);
+ OPCODE(UMUL_LOHI);
+ OPCODE(SDIVREM);
+ OPCODE(UDIVREM);
OPCODE(MVC);
OPCODE(MVC_LOOP);
OPCODE(NC);
@@ -5778,14 +5765,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
return DoneMBB;
}
-// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
+// Emit an extension from a GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
-// it's "don't care". SubReg is subreg_l32 when extending a GR32
-// and subreg_l64 when extending a GR64.
+// it's "don't care".
MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
MachineBasicBlock *MBB,
- bool ClearEven,
- unsigned SubReg) const {
+ bool ClearEven) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -5808,7 +5793,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
In128 = NewIn128;
}
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
- .addReg(In128).addReg(Src).addImm(SubReg);
+ .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
MI.eraseFromParent();
return MBB;
@@ -6172,12 +6157,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::CondStoreF64Inv:
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
- case SystemZ::AEXT128_64:
- return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
- case SystemZ::ZEXT128_32:
- return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
- case SystemZ::ZEXT128_64:
- return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
+ case SystemZ::AEXT128:
+ return emitExt128(MI, MBB, false);
+ case SystemZ::ZEXT128:
+ return emitExt128(MI, MBB, true);
case SystemZ::ATOMIC_SWAPW:
return emitAtomicLoadBinary(MI, MBB, 0, 0);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 5dcb19c0a35db..6c9c404816f09 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -86,14 +86,11 @@ enum NodeType : unsigned {
// Count number of bits set in operand 0 per byte.
POPCNT,
- // Wrappers around the ISD opcodes of the same name. The output and
- // first input operands are GR128s. The trailing numbers are the
- // widths of the second operand in bits.
- UMUL_LOHI64,
- SDIVREM32,
- SDIVREM64,
- UDIVREM32,
- UDIVREM64,
+ // Wrappers around the ISD opcodes of the same name. The output is GR128.
+ // Input operands may be GR64 or GR32, depending on the instruction.
+ UMUL_LOHI,
+ SDIVREM,
+ UDIVREM,
// Use a series of MVCs to copy bytes from one memory location to another.
// The operands are:
@@ -562,7 +559,7 @@ private:
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
- bool ClearEven, unsigned SubReg) const;
+ bool ClearEven) const;
MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned BinOpcode, unsigned BitSize,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 98f66c29ae647..4569be7602e45 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -677,6 +677,22 @@ let Predicates = [FeatureLoadAndTrap] in {
def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
}
+// Extend GR64s to GR128s.
+let usesCustomInserter = 1 in
+ def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+
+//===----------------------------------------------------------------------===//
+// "Any" extensions
+//===----------------------------------------------------------------------===//
+
+// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
+
+// Extend GR64s to GR128s.
+let usesCustomInserter = 1 in
+ def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+
//===----------------------------------------------------------------------===//
// Truncations
//===----------------------------------------------------------------------===//
@@ -1216,13 +1232,17 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of a register, producing two results.
def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>;
def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>;
-def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>;
+def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2),
+ (MLGR (AEXT128 GR64:$src1), GR64:$src2)>;
// Multiplication of memory, producing two results.
def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>;
def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>;
def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
-def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
+def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>;
+def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+ (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
// Division and remainder
@@ -1230,19 +1250,38 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
let hasSideEffects = 1 in { // Do not speculatively execute.
// Division and remainder, from registers.
- def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>;
- def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
- def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
- def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
- def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
+ def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>;
+ def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>;
+ def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>;
+ def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>;
+ def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>;
// Division and remainder, from memory.
- def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
- def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
- def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
- def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
- def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
-}
+ def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
+ def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>;
+ def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>;
+ def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>;
+ def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>;
+}
+def : Pat<(z_sdivrem GR64:$src1, GR32:$src2),
+ (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>;
+def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))),
+ (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
+def : Pat<(z_sdivrem GR64:$src1, GR64:$src2),
+ (DSGR (AEXT128 GR64:$src1), GR64:$src2)>;
+def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+ (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
+
+def : Pat<(z_udivrem GR32:$src1, GR32:$src2),
+ (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
+ subreg_l32)), GR32:$src2)>;
+def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))),
+ (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
+ subreg_l32)), bdxaddr20only:$src2)>;
+def : Pat<(z_udivrem GR64:$src1, GR64:$src2),
+ (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>;
+def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+ (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -1894,17 +1933,6 @@ def : Pat<(ctlz GR64:$src),
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
-// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
-def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
-
-// Extend GR32s and GR64s to GR128s.
-let usesCustomInserter = 1 in {
- def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
- def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
- def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
-}
-
// Search a block of memory for a character.
let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xB25E, z_search_string>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 3a0e01da42f03..d4cd89ce590fc 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -127,7 +127,7 @@ MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
return Copy;
}
-// Create a virtal register in *TLSBaseAddrReg, and populate it by
+// Create a virtual register in *TLSBaseAddrReg, and populate it by
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
unsigned *TLSBaseAddrReg) {
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index b6feaa49d8585..8342463c1086f 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -18,7 +18,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
#ifndef NDEBUG
// Print the set of SUs
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index ab2392809f3be..9c6d5819f8a7e 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -36,14 +36,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
-def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
+def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
- SDTCisVT<1, untyped>,
- SDTCisVT<2, i32>]>;
-def SDT_ZGR128Binary64 : SDTypeProfile<1, 2,
- [SDTCisVT<0, untyped>,
- SDTCisVT<1, untyped>,
- SDTCisVT<2, i64>]>;
+ SDTCisInt<1>,
+ SDTCisInt<2>]>;
def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@@ -185,11 +181,9 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
[SDNPInGlue]>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
-def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
-def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
-def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
-def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
-def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
+def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>;
+def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>;
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index adc9f2976f871..72543c1eaee2e 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -15,7 +15,7 @@
def Z13Model : SchedMachineModel {
let UnsupportedFeatures = Arch11UnsupportedFeatures.List;
-
+
let IssueWidth = 8;
let MicroOpBufferSize = 60; // Issue queues
let LoadLatency = 1; // Optimistic load latency.
@@ -159,7 +159,7 @@ def : InstRW<[FXb], (instregex "CondReturn$")>;
// Select instructions
//===----------------------------------------------------------------------===//
-// Select pseudo
+// Select pseudo
def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>;
// CondStore pseudos
@@ -226,7 +226,7 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
-def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
@@ -282,7 +282,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
(instregex "LM(H|Y|G)?$")>;
// Load multiple disjoint
-def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>;
// Store multiple (estimated average of ceil(5/2) FXb ops)
def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10,
@@ -446,13 +446,13 @@ def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>;
def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>;
def : InstRW<[FXa, Lat8], (instregex "MSGR$")>;
def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>;
-def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
-def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXa2, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXa2, Lat9, GroupAlone], (instregex "MLGR$")>;
def : InstRW<[FXa, Lat5], (instregex "MGHI$")>;
def : InstRW<[FXa, Lat5], (instregex "MHI$")>;
def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>;
-def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>;
-def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+def : InstRW<[FXa2, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXa2, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
//===----------------------------------------------------------------------===//
// Division and remainder
@@ -460,8 +460,8 @@ def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>;
def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>;
+def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>;
def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>;
def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>;
def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>;
@@ -474,7 +474,8 @@ def : InstRW<[FXa], (instregex "SLL(G|K)?$")>;
def : InstRW<[FXa], (instregex "SRL(G|K)?$")>;
def : InstRW<[FXa], (instregex "SRA(G|K)?$")>;
def : InstRW<[FXa], (instregex "SLA(G|K)?$")>;
-def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>;
+def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone],
+ (instregex "S(L|R)D(A|L)$")>;
// Rotate
def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>;
@@ -537,7 +538,7 @@ def : InstRW<[FXb], (instregex "TMLH(64)?$")>;
def : InstRW<[FXb], (instregex "TMLL(64)?$")>;
// Compare logical characters under mask
-def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>;
+def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>;
//===----------------------------------------------------------------------===//
// Prefetch and execution hint
@@ -573,7 +574,7 @@ def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone],
(instregex "CDSG$")>;
// Compare and swap and store
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>;
// Perform locked operation
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
@@ -589,36 +590,45 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
// Translate and convert
//===----------------------------------------------------------------------===//
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>;
+def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>;
+def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
//===----------------------------------------------------------------------===//
// Message-security assist
//===----------------------------------------------------------------------===//
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>;
+def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>;
//===----------------------------------------------------------------------===//
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
-def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone],
+ (instregex "CVBG$")>;
+def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>;
+def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone],
+ (instregex "CVDG$")>;
+def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>;
-def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone],
+def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone],
+def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone],
(instregex "(M|D)P$")>;
-def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone],
+def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone],
(instregex "SRP$")>;
def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>;
-def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>;
+def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
//===----------------------------------------------------------------------===//
@@ -688,25 +698,25 @@ def : InstRW<[FXb], (instregex "PPA$")>;
//===----------------------------------------------------------------------===//
// Find leftmost one
-def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>;
+def : InstRW<[FXa, FXa, Lat6, GroupAlone], (instregex "FLOGR$")>;
// Population count
def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>;
// Extend
-def : InstRW<[FXa], (instregex "AEXT128_64$")>;
-def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>;
+def : InstRW<[FXa], (instregex "AEXT128$")>;
+def : InstRW<[FXa], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>;
-def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
// Various complex instructions
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+def : InstRW<[LSU, Lat30], (instregex "CFC$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30], (instregex "CKSM$")>;
+def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>;
// Execute
def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
@@ -833,7 +843,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>;
// Addition
def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>;
def : InstRW<[VecBF], (instregex "A(E|D)BR$")>;
-def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>;
// Subtraction
def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>;
@@ -848,9 +858,9 @@ def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>;
def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>;
// Multiply and add / subtract
-def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
-def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>;
// Division
@@ -859,7 +869,7 @@ def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>;
def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>;
// Divide to integer
-def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>;
//===----------------------------------------------------------------------===//
// FP: Comparisons
@@ -882,8 +892,8 @@ def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>;
def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
-def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>;
-def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXa, Lat30], (instregex "SFASR$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>;
def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
@@ -904,7 +914,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>;
// Load rounded
def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>;
def : InstRW<[VecBF], (instregex "LEXR$")>;
-def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>;
+def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>;
// Load lengthened
def : InstRW<[LSU], (instregex "LDE$")>;
@@ -955,7 +965,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>;
// Addition
def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>;
-def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>;
// Subtraction
def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
@@ -968,16 +978,20 @@ def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>;
def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>;
def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>;
def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>;
-def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>;
-def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
// Multiply and add / subtract
-def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>;
-def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
-def : InstRW<[VecBF], (instregex "M(A|S)DR$")>;
-def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
-def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>;
// Division
def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>;
@@ -989,8 +1003,8 @@ def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>;
//===----------------------------------------------------------------------===//
// Compare
-def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>;
-def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>;
+def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[VecBF], (instregex "C(E|D)R$")>;
def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>;
@@ -1032,7 +1046,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>;
def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>;
def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>;
def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>;
-def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>;
@@ -1047,7 +1061,7 @@ def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>;
def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>;
// Perform floating-point operation
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+def : InstRW<[FXb, Lat30], (instregex "PFPO$")>;
//===----------------------------------------------------------------------===//
// DFP: Unary arithmetic
@@ -1071,7 +1085,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>;
// Addition
def : InstRW<[VecDF], (instregex "ADTR(A)?$")>;
-def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>;
// Subtraction
def : InstRW<[VecDF], (instregex "SDTR(A)?$")>;
@@ -1090,15 +1104,15 @@ def : InstRW<[VecDF], (instregex "QADTR$")>;
def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>;
// Reround
-def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>;
+def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>;
def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>;
// Shift significand left/right
-def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>;
def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
-def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>;
def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
@@ -1115,7 +1129,7 @@ def : InstRW<[VecDF], (instregex "CEXTR$")>;
// Test Data Class/Group
def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>;
-def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
// --------------------------------- Vector --------------------------------- //
@@ -1271,32 +1285,43 @@ def : InstRW<[VecStr, Lat5], (instregex "VTM$")>;
// Vector: Floating-point arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>;
-def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>;
+// Conversion and rounding
+def : InstRW<[VecBF2], (instregex "VCD(L)?G$")>;
+def : InstRW<[VecBF2], (instregex "VCD(L)?GB$")>;
+def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>;
def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>;
-def : InstRW<[VecBF2], (instregex "VFADB$")>;
-def : InstRW<[VecBF], (instregex "WFADB$")>;
-def : InstRW<[VecBF2], (instregex "VCGDB$")>;
-def : InstRW<[VecBF], (instregex "WCGDB$")>;
-def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>;
-def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>;
-def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>;
-def : InstRW<[VecBF2], (instregex "VCLGDB$")>;
-def : InstRW<[VecBF], (instregex "WCLGDB$")>;
-def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>;
-def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>;
-def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>;
-def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>;
-def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>;
-def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
-def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
-def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>;
-def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>;
+def : InstRW<[VecBF2], (instregex "VC(L)?GDB$")>;
+def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>;
def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>;
def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>;
def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[VecBF2], (instregex "VFI$")>;
+def : InstRW<[VecBF2], (instregex "VFIDB$")>;
+def : InstRW<[VecBF], (instregex "WFIDB$")>;
+
+// Sign operations
+def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>;
-// divide / square root
+// Test data class
+def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>;
+
+// Add / subtract
+def : InstRW<[VecBF2], (instregex "VF(A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VF(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[VecBF2], (instregex "VFM$")>;
+def : InstRW<[VecBF2], (instregex "VFMDB$")>;
+def : InstRW<[VecBF], (instregex "WFMDB$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>;
+
+// Divide / square root
def : InstRW<[VecFPd], (instregex "VFD$")>;
def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
def : InstRW<[VecFPd], (instregex "VFSQ$")>;
@@ -1308,10 +1333,10 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>;
def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>;
-def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
//===----------------------------------------------------------------------===//
@@ -1351,12 +1376,12 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>;
-def : InstRW<[FXa, Lat3], (instregex "IPK$")>;
-def : InstRW<[LSU], (instregex "SPKA$")>;
-def : InstRW<[LSU], (instregex "SSM$")>;
-def : InstRW<[FXb], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
def : InstRW<[FXa, Lat3], (instregex "IAC$")>;
-def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
// System: Control Register Instructions
@@ -1411,14 +1436,14 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>;
def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>;
-def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>;
//===----------------------------------------------------------------------===//
// System: Address-Space Instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>;
-def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>;
def : InstRW<[FXb, Lat30], (instregex "PR$")>;
def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>;
@@ -1430,7 +1455,7 @@ def : InstRW<[FXb, Lat20], (instregex "TAR$")>;
// System: Linkage-Stack Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[FXb, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>;
def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>;
def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>;
@@ -1442,13 +1467,13 @@ def : InstRW<[FXb, Lat30], (instregex "PTFF$")>;
def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>;
def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>;
def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>;
-def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>;
def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
(instregex "STCK(F)?$")>;
def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
(instregex "STCKE$")>;
def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>;
-def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>;
+def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>;
//===----------------------------------------------------------------------===//
// System: CPU-Related Instructions
@@ -1459,7 +1484,7 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>;
-def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>;
def : InstRW<[FXb, Lat30], (instregex "PTF$")>;
def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
@@ -1468,7 +1493,7 @@ def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
-def : InstRW<[FXb], (instregex "MC$")>;
+def : InstRW<[FXb, GroupAlone], (instregex "MC$")>;
def : InstRW<[FXb, Lat30], (instregex "DIAG$")>;
def : InstRW<[FXb], (instregex "TRAC(E|G)$")>;
def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>;
@@ -1483,7 +1508,8 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>;
def : InstRW<[FXb], (instregex "LPP$")>;
def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>;
def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>;
-def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>;
def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>;
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 128049a090863..e3e1999d8ad8d 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -627,8 +627,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
// Extend
-def : InstRW<[FXU], (instregex "AEXT128_64$")>;
-def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+def : InstRW<[FXU], (instregex "AEXT128$")>;
+def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 76b378454631e..59f37205f4127 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -665,8 +665,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
// Extend
-def : InstRW<[FXU], (instregex "AEXT128_64$")>;
-def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+def : InstRW<[FXU], (instregex "AEXT128$")>;
+def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index ce5c57e0f519b..9ac768b2189d7 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -779,15 +779,14 @@ int SystemZTTIImpl::
getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// vlvgp will insert two grs into a vector register, so only count half the
// number of instructions.
- if (Opcode == Instruction::InsertElement &&
- Val->getScalarType()->isIntegerTy(64))
+ if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64))
return ((Index % 2 == 0) ? 1 : 0);
if (Opcode == Instruction::ExtractElement) {
int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
// Give a slight penalty for moving out of vector pipeline to FXU unit.
- if (Index == 0 && Val->getScalarType()->isIntegerTy())
+ if (Index == 0 && Val->isIntOrIntVectorTy())
Cost += 1;
return Cost;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index ad59f2f405879..00bf02469bdd9 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -115,8 +115,8 @@ void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) {
void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
- StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
- OS << "\t.functype\t" << name;
+ MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+ OS << "\t.functype\t" << Symbol->getName();
if (Results.empty())
OS << ", void";
else {
@@ -171,7 +171,7 @@ void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) {
}
void WebAssemblyTargetELFStreamer::emitIndirectFunctionType(
- StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+ MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
// Nothing to emit here. TODO: Re-design how linking works and re-evaluate
// whether it's necessary for .o files to declare indirect function types.
}
@@ -255,9 +255,25 @@ void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
}
void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
- StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
- // Nothing to emit here. TODO: Re-design how linking works and re-evaluate
- // whether it's necessary for .o files to declare indirect function types.
+ MCSymbol *Symbol, SmallVectorImpl<MVT> &Params,
+ SmallVectorImpl<MVT> &Results) {
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Symbol);
+ if (WasmSym->isFunction()) {
+ // Symbol already has its arguments and result set.
+ return;
+ }
+
+ SmallVector<wasm::ValType, 4> ValParams;
+ for (MVT Ty : Params)
+ ValParams.push_back(WebAssembly::toValType(Ty));
+
+ SmallVector<wasm::ValType, 1> ValResults;
+ for (MVT Ty : Results)
+ ValResults.push_back(WebAssembly::toValType(Ty));
+
+ WasmSym->setParams(std::move(ValParams));
+ WasmSym->setReturns(std::move(ValResults));
+ WasmSym->setIsFunction(true);
}
void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 5ad147e5e5960..102d7219a1e74 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -44,7 +44,7 @@ public:
/// .endfunc
virtual void emitEndFunc() = 0;
/// .functype
- virtual void emitIndirectFunctionType(StringRef name,
+ virtual void emitIndirectFunctionType(MCSymbol *Symbol,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) = 0;
/// .indidx
@@ -69,7 +69,7 @@ public:
void emitGlobal(ArrayRef<wasm::Global> Globals) override;
void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(StringRef name,
+ void emitIndirectFunctionType(MCSymbol *Symbol,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) override;
void emitIndIdx(const MCExpr *Value) override;
@@ -87,7 +87,7 @@ public:
void emitGlobal(ArrayRef<wasm::Global> Globals) override;
void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(StringRef name,
+ void emitIndirectFunctionType(MCSymbol *Symbol,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) override;
void emitIndIdx(const MCExpr *Value) override;
@@ -105,7 +105,7 @@ public:
void emitGlobal(ArrayRef<wasm::Global> Globals) override;
void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(StringRef name,
+ void emitIndirectFunctionType(MCSymbol *Symbol,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) override;
void emitIndIdx(const MCExpr *Value) override;
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index f51585a10ca12..211358ad66cd5 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -84,7 +84,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
ComputeSignatureVTs(F, TM, Params, Results);
- getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params,
+ getTargetStreamer()->emitIndirectFunctionType(getSymbol(&F), Params,
Results);
}
}
@@ -214,11 +214,8 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
if (GV->getValueType()->isFunctionTy()) {
- MCSymbol* Sym = getSymbol(GV);
- if (!isa<MCSymbolELF>(Sym))
- cast<MCSymbolWasm>(Sym)->setIsFunction(true);
return MCSymbolRefExpr::create(
- Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+ getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
}
return AsmPrinter::lowerConstant(CV);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 1691808d05a0f..700111743ee8e 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -132,7 +132,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
// no blocks not dominated by the loop header.
// - It's desirable to preserve the original block order when possible.
// We use two ready lists; Preferred and Ready. Preferred has recently
- // processed sucessors, to help preserve block sequences from the original
+ // processed successors, to help preserve block sequences from the original
// order. Ready has the remaining ready blocks.
PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
CompareBlockNumbers>
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index ff186eb915039..8880539804cae 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -112,8 +112,6 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
MCSymbolRefExpr::VariantKind VK =
IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
: MCSymbolRefExpr::VK_None;
- if (!isa<MCSymbolELF>(Sym))
- cast<MCSymbolWasm>(Sym)->setIsFunction(IsFunc);
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index c02ef4a1c399b..2599064334ee8 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -394,11 +394,22 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
/* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR,
// ELEMENT-WISE ATOMIC MEMORY
-/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR,
-/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR,
-/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR,
-/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR,
-/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
+
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
// EXCEPTION HANDLING
/* UNWIND_RESUME */ unsupported,
@@ -839,11 +850,21 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
/* MEMCPY */ "memcpy",
/* MEMMOVE */ "memset",
/* MEMSET */ "memmove",
-/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1",
-/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2",
-/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4",
-/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8",
-/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16",
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
+/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
+/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
+/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
/* UNWIND_RESUME */ "_Unwind_Resume",
/* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1",
/* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2",
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 825f23dc52d9b..c1d216c8b7af8 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2453,8 +2453,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
break;
}
- // In MS inline asm curly braces mark the begining/end of a block, therefore
- // they should be interepreted as end of statement
+ // In MS inline asm curly braces mark the beginning/end of a block,
+ // therefore they should be interepreted as end of statement
CurlyAsEndOfStatement =
isParsingIntelSyntax() && isParsingInlineAsm() &&
(getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 5e809c34325ee..f5f3a4cc83dc9 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::EXTRQI:
if (MI->getOperand(2).isImm() &&
MI->getOperand(3).isImm())
- DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+ DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(),
MI->getOperand(3).getImm(),
ShuffleMask);
@@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::INSERTQI:
if (MI->getOperand(3).isImm() &&
MI->getOperand(4).isImm())
- DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+ DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(),
MI->getOperand(4).getImm(),
ShuffleMask);
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 914fb36f91a7d..733eac7c03212 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -110,7 +110,7 @@ public:
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsResolved) const override {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1be5aec849fc6..8a0fbfb45b22d 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-void DecodeEXTRQIMask(int Len, int Idx,
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit extraction instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx,
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
- // of the lower 64-bits. The upper 64-bits are undefined.
+ // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
+ // elements of the lower 64-bits. The upper 64-bits are undefined.
for (int i = 0; i != Len; ++i)
ShuffleMask.push_back(i + Idx);
- for (int i = Len; i != 8; ++i)
+ for (int i = Len; i != (int)HalfElts; ++i)
ShuffleMask.push_back(SM_SentinelZero);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != (int)NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
-void DecodeINSERTQIMask(int Len, int Idx,
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit insertion instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx,
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // INSERTQ: Extract lowest Len bytes from lower half of second source and
- // insert over first source starting at Idx byte. The upper 64-bits are
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source starting at Idx element. The upper 64-bits are
// undefined.
for (int i = 0; i != Idx; ++i)
ShuffleMask.push_back(i);
for (int i = 0; i != Len; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i + NumElts);
+ for (int i = Idx + Len; i != (int)HalfElts; ++i)
ShuffleMask.push_back(i);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != (int)NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 17619d09d0594..251c9f7558ec7 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
-void DecodeEXTRQIMask(int Len, int Idx,
+/// Decode a SSE4A EXTRQ instruction as a shuffle mask.
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
-void DecodeINSERTQIMask(int Len, int Idx,
+/// Decode a SSE4A INSERTQ instruction as a shuffle mask.
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7437ebacfac3a..4ca57fe9fb00f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -451,6 +451,7 @@ class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
FeatureLAHFSAHF,
FeatureMPX,
FeatureSHA,
+ FeatureRDRAND,
FeatureRDSEED,
FeatureXSAVE,
FeatureXSAVEOPT,
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 161bfa7b54748..99aeec67c3266 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -19,6 +19,7 @@
#include "X86InstrInfo.h"
#include "X86TargetMachine.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -35,7 +36,7 @@ using namespace llvm;
X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
: CallLowering(&TLI) {}
-void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL,
MachineRegisterInfo &MRI,
@@ -43,14 +44,24 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
LLVMContext &Context = OrigArg.Ty->getContext();
- EVT VT = TLI.getValueType(DL, OrigArg.Ty);
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+ if (SplitVTs.size() != 1) {
+ // TODO: support struct/array split
+ return false;
+ }
+
+ EVT VT = SplitVTs[0];
unsigned NumParts = TLI.getNumRegisters(Context, VT);
if (NumParts == 1) {
// replace the original type ( pointer -> GPR ).
SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
OrigArg.Flags, OrigArg.IsFixed);
- return;
+ return true;
}
SmallVector<unsigned, 8> SplitRegs;
@@ -67,6 +78,7 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
}
PerformArgSplit(SplitRegs);
+ return true;
}
namespace {
@@ -113,9 +125,11 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
- splitToValueTypes(
- OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) { MIRBuilder.buildUnmerge(Regs, VReg); });
+ if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](ArrayRef<unsigned> Regs) {
+ MIRBuilder.buildUnmerge(Regs, VReg);
+ }))
+ return false;
FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
@@ -181,12 +195,23 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
unsigned Idx = 0;
for (auto &Arg : F.args()) {
+
+ // TODO: handle not simple cases.
+ if (Arg.hasAttribute(Attribute::ByVal) ||
+ Arg.hasAttribute(Attribute::InReg) ||
+ Arg.hasAttribute(Attribute::StructRet) ||
+ Arg.hasAttribute(Attribute::SwiftSelf) ||
+ Arg.hasAttribute(Attribute::SwiftError) ||
+ Arg.hasAttribute(Attribute::Nest))
+ return false;
+
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
- setArgFlags(OrigArg, Idx + 1, DL, F);
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
- MIRBuilder.buildMerge(VRegs[Idx], Regs);
- });
+ setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
+ if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](ArrayRef<unsigned> Regs) {
+ MIRBuilder.buildMerge(VRegs[Idx], Regs);
+ }))
+ return false;
Idx++;
}
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index 8a8afb5682982..6a5dabf33a0a0 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -39,7 +39,7 @@ private:
/// A function of this type is used to perform value split action.
typedef std::function<void(ArrayRef<unsigned>)> SplitArgTy;
- void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ bool splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
SplitArgTy SplitArg) const;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 7d146d050a5c2..6decb550ad5f8 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -651,7 +651,15 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
- CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>,
+ // AVX
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM1, YMM2, YMM3, YMM4, YMM5, YMM6]>>>,
+ // AVX-512
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfSubtarget<"hasAVX512()",
+ CCAssignToReg<[ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6]>>>
]>;
def CC_X86_64_HiPE : CallingConv<[
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 621505aaded9e..ee9e78146305d 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -3039,6 +3039,9 @@ bool X86FastISel::fastLowerArguments() {
if (!Subtarget->is64Bit())
return false;
+ if (Subtarget->useSoftFloat())
+ return false;
+
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index e3aa227702bea..f294e819090bc 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -972,7 +972,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
- bool UseRedZone = false;
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
// The default stack probe size is 4096 if the function has no stackprobesize
@@ -1011,7 +1010,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI.setStackSize(StackSize);
- UseRedZone = true;
}
// Insert stack pointer adjustment for later moving of return addr. Only
@@ -1189,7 +1187,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
- assert(!UseRedZone && "The Red Zone is not accounted for in stack probes");
+ assert(!X86FI->getUsesRedZone() &&
+ "The Red Zone is not accounted for in stack probes");
// Check whether EAX is livein for this block.
bool isEAXAlive = isEAXLiveIn(MBB);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b89914f8893e7..65486cf7f529e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
case X86ISD::INSERTPS:
+ case X86ISD::EXTRQI:
+ case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::EXTRQI:
+ if (isa<ConstantSDNode>(N->getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(2))) {
+ int BitLen = N->getConstantOperandVal(1);
+ int BitIdx = N->getConstantOperandVal(2);
+ DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
+ IsUnary = true;
+ }
+ break;
+ case X86ISD::INSERTQI:
+ if (isa<ConstantSDNode>(N->getOperand(2)) &&
+ isa<ConstantSDNode>(N->getOperand(3))) {
+ int BitLen = N->getConstantOperandVal(2);
+ int BitIdx = N->getConstantOperandVal(3);
+ DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ }
+ break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -9317,11 +9337,11 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getBitcast(VT, V);
}
-/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
-static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SelectionDAG &DAG) {
+// EXTRQ: Extract Len elements from lower half of source, starting at Idx.
+// Remainder of lower half result is zero and upper half is all undef.
+static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask, uint64_t &BitLen,
+ uint64_t &BitIdx, const APInt &Zeroable) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
@@ -9329,120 +9349,133 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
- return SDValue();
+ return false;
- // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
- // Remainder of lower half result is zero and upper half is all undef.
- auto LowerAsEXTRQ = [&]() {
- // Determine the extraction length from the part of the
- // lower half that isn't zeroable.
- int Len = HalfSize;
- for (; Len > 0; --Len)
- if (!Zeroable[Len - 1])
- break;
- assert(Len > 0 && "Zeroable shuffle mask");
+ // Determine the extraction length from the part of the
+ // lower half that isn't zeroable.
+ int Len = HalfSize;
+ for (; Len > 0; --Len)
+ if (!Zeroable[Len - 1])
+ break;
+ assert(Len > 0 && "Zeroable shuffle mask");
- // Attempt to match first Len sequential elements from the lower half.
- SDValue Src;
- int Idx = -1;
- for (int i = 0; i != Len; ++i) {
- int M = Mask[i];
- if (M < 0)
- continue;
- SDValue &V = (M < Size ? V1 : V2);
- M = M % Size;
+ // Attempt to match first Len sequential elements from the lower half.
+ SDValue Src;
+ int Idx = -1;
+ for (int i = 0; i != Len; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ SDValue &V = (M < Size ? V1 : V2);
+ M = M % Size;
- // The extracted elements must start at a valid index and all mask
- // elements must be in the lower half.
- if (i > M || M >= HalfSize)
- return SDValue();
+ // The extracted elements must start at a valid index and all mask
+ // elements must be in the lower half.
+ if (i > M || M >= HalfSize)
+ return false;
- if (Idx < 0 || (Src == V && Idx == (M - i))) {
- Src = V;
- Idx = M - i;
- continue;
- }
- return SDValue();
+ if (Idx < 0 || (Src == V && Idx == (M - i))) {
+ Src = V;
+ Idx = M - i;
+ continue;
}
+ return false;
+ }
- if (Idx < 0)
- return SDValue();
+ if (!Src || Idx < 0)
+ return false;
- assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
- int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
- int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
- return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src,
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
- };
+ assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
+ BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ V1 = Src;
+ return true;
+}
+
+// INSERTQ: Extract lowest Len elements from lower half of second source and
+// insert over first source, starting at Idx.
+// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
+static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask, uint64_t &BitLen,
+ uint64_t &BitIdx) {
+ int Size = Mask.size();
+ int HalfSize = Size / 2;
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Upper half must be undefined.
+ if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ return false;
+
+ for (int Idx = 0; Idx != HalfSize; ++Idx) {
+ SDValue Base;
+
+ // Attempt to match first source from mask before insertion point.
+ if (isUndefInRange(Mask, 0, Idx)) {
+ /* EMPTY */
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+ Base = V1;
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+ Base = V2;
+ } else {
+ continue;
+ }
- if (SDValue ExtrQ = LowerAsEXTRQ())
- return ExtrQ;
+ // Extend the extraction length looking to match both the insertion of
+ // the second source and the remaining elements of the first.
+ for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
+ SDValue Insert;
+ int Len = Hi - Idx;
- // INSERTQ: Extract lowest Len elements from lower half of second source and
- // insert over first source, starting at Idx.
- // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
- auto LowerAsInsertQ = [&]() {
- for (int Idx = 0; Idx != HalfSize; ++Idx) {
- SDValue Base;
+ // Match insertion.
+ if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
+ Insert = V1;
+ } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
+ Insert = V2;
+ } else {
+ continue;
+ }
- // Attempt to match first source from mask before insertion point.
- if (isUndefInRange(Mask, 0, Idx)) {
+ // Match the remaining elements of the lower half.
+ if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
/* EMPTY */
- } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+ } else if ((!Base || (Base == V1)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
Base = V1;
- } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+ } else if ((!Base || (Base == V2)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
+ Size + Hi)) {
Base = V2;
} else {
continue;
}
- // Extend the extraction length looking to match both the insertion of
- // the second source and the remaining elements of the first.
- for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
- SDValue Insert;
- int Len = Hi - Idx;
-
- // Match insertion.
- if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
- Insert = V1;
- } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
- Insert = V2;
- } else {
- continue;
- }
-
- // Match the remaining elements of the lower half.
- if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
- /* EMPTY */
- } else if ((!Base || (Base == V1)) &&
- isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
- Base = V1;
- } else if ((!Base || (Base == V2)) &&
- isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
- Size + Hi)) {
- Base = V2;
- } else {
- continue;
- }
-
- // We may not have a base (first source) - this can safely be undefined.
- if (!Base)
- Base = DAG.getUNDEF(VT);
-
- int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
- int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
- return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert,
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
- }
+ BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ V1 = Base;
+ V2 = Insert;
+ return true;
}
+ }
- return SDValue();
- };
+ return false;
+}
+
+/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
+static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ SelectionDAG &DAG) {
+ uint64_t BitLen, BitIdx;
+ if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
+ return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
- if (SDValue InsertQ = LowerAsInsertQ())
- return InsertQ;
+ if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
+ return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
+ V2 ? V2 : DAG.getUNDEF(VT),
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
return SDValue();
}
@@ -22817,7 +22850,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
auto Builder = IRBuilder<>(AI);
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- auto SynchScope = AI->getSynchScope();
+ auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
// ReleaseAcquire orderings.
auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
@@ -22839,7 +22872,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// otherwise, we might be able to be more aggressive on relaxed idempotent
// rmw. In practice, they do not look useful, so we don't try to be
// especially clever.
- if (SynchScope == SingleThread)
+ if (SSID == SyncScope::SingleThread)
// FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
// the IR level, so we must wrap it in an intrinsic.
return nullptr;
@@ -22858,7 +22891,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// Finally we can emit the atomic load.
LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
AI->getType()->getPrimitiveSizeInBits());
- Loaded->setAtomic(Order, SynchScope);
+ Loaded->setAtomic(Order, SSID);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
return Loaded;
@@ -22869,13 +22902,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
- SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
- FenceScope == CrossThread) {
+ FenceSSID == SyncScope::System) {
if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
@@ -23203,6 +23236,20 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SDLoc DL(Op.getNode());
SDValue Op0 = Op.getOperand(0);
+ // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
+ if (Subtarget.hasVPOPCNTDQ()) {
+ if (VT == MVT::v8i16) {
+ Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0);
+ Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op);
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
+ }
+ if (VT == MVT::v16i8 || VT == MVT::v16i16) {
+ Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0);
+ Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op);
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
+ }
+ }
+
if (!Subtarget.hasSSSE3()) {
// We can't use the fast LUT approach, so fall back on vectorized bitmath.
assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
@@ -27101,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
const X86Subtarget &Subtarget,
@@ -27111,38 +27159,67 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
- bool ContainsZeros = false;
- APInt Zeroable(NumMaskElts, false);
- for (unsigned i = 0; i != NumMaskElts; ++i) {
- int M = Mask[i];
- if (isUndefOrZero(M))
- Zeroable.setBit(i);
- ContainsZeros |= (M == SM_SentinelZero);
- }
+ bool ContainsZeros =
+ llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
- // Attempt to match against byte/bit shifts.
- // FIXME: Add 512-bit support.
- if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
- int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
- MaskScalarSizeInBits, Mask,
- 0, Zeroable, Subtarget);
- if (0 < ShiftAmt) {
- PermuteImm = (unsigned)ShiftAmt;
+ // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
+ if (!ContainsZeros && MaskScalarSizeInBits == 64) {
+ // Check for lane crossing permutes.
+ if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
+ // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
+ if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) {
+ Shuffle = X86ISD::VPERMI;
+ ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
+ PermuteImm = getV4X86ShuffleImm(Mask);
+ return true;
+ }
+ if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
+ Shuffle = X86ISD::VPERMI;
+ ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
+ PermuteImm = getV4X86ShuffleImm(RepeatedMask);
+ return true;
+ }
+ }
+ } else if (AllowFloatDomain && Subtarget.hasAVX()) {
+ // VPERMILPD can permute with a non-repeating shuffle.
+ Shuffle = X86ISD::VPERMILPI;
+ ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
+ PermuteImm = 0;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
+ PermuteImm |= (M & 1) << i;
+ }
return true;
}
}
- // Ensure we don't contain any zero elements.
- if (ContainsZeros)
- return false;
-
- assert(llvm::all_of(Mask, [&](int M) {
- return SM_SentinelUndef <= M && M < (int)NumMaskElts;
- }) && "Expected unary shuffle");
+ // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
+ // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
+ // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
+ if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
+ !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
+ // Narrow the repeated mask to create 32-bit element permutes.
+ SmallVector<int, 4> WordMask = RepeatedMask;
+ if (MaskScalarSizeInBits == 64)
+ scaleShuffleMask(2, RepeatedMask, WordMask);
+
+ Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
+ ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
+ PermuteImm = getV4X86ShuffleImm(WordMask);
+ return true;
+ }
+ }
- // Handle PSHUFLW/PSHUFHW repeated patterns.
- if (MaskScalarSizeInBits == 16) {
+ // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
+ if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
ArrayRef<int> LoMask(Mask.data() + 0, 4);
@@ -27170,78 +27247,23 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
return true;
}
-
- return false;
}
- return false;
- }
-
- // We only support permutation of 32/64 bit elements after this.
- if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64)
- return false;
-
- // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
- // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
- return false;
-
- // Pre-AVX2 we must use float shuffles on 256-bit vectors.
- if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) {
- AllowFloatDomain = true;
- AllowIntDomain = false;
}
- // Check for lane crossing permutes.
- if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
- // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
- if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) {
- Shuffle = X86ISD::VPERMI;
- ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
- PermuteImm = getV4X86ShuffleImm(Mask);
+ // Attempt to match against byte/bit shifts.
+ // FIXME: Add 512-bit support.
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
+ MaskScalarSizeInBits, Mask,
+ 0, Zeroable, Subtarget);
+ if (0 < ShiftAmt) {
+ PermuteImm = (unsigned)ShiftAmt;
return true;
}
- if (Subtarget.hasAVX512() && MaskVT.is512BitVector() && Mask.size() == 8) {
- SmallVector<int, 4> RepeatedMask;
- if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
- Shuffle = X86ISD::VPERMI;
- ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
- PermuteImm = getV4X86ShuffleImm(RepeatedMask);
- return true;
- }
- }
- return false;
}
- // VPERMILPD can permute with a non-repeating shuffle.
- if (AllowFloatDomain && MaskScalarSizeInBits == 64) {
- Shuffle = X86ISD::VPERMILPI;
- ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
- PermuteImm = 0;
- for (int i = 0, e = Mask.size(); i != e; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
- PermuteImm |= (M & 1) << i;
- }
- return true;
- }
-
- // We need a repeating shuffle mask for VPERMILPS/PSHUFD.
- SmallVector<int, 4> RepeatedMask;
- if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask))
- return false;
-
- // Narrow the repeated mask for 32-bit element permutes.
- SmallVector<int, 4> WordMask = RepeatedMask;
- if (MaskScalarSizeInBits == 64)
- scaleShuffleMask(2, RepeatedMask, WordMask);
-
- Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
- PermuteImm = getV4X86ShuffleImm(WordMask);
- return true;
+ return false;
}
// Attempt to match a combined unary shuffle mask against supported binary
@@ -27303,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1, SDValue &V2, SDLoc &DL,
@@ -27388,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to combine to INSERTPS.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
MaskVT.is128BitVector()) {
- APInt Zeroable(4, 0);
- for (unsigned i = 0; i != NumMaskElts; ++i)
- if (Mask[i] < 0)
- Zeroable.setBit(i);
-
if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
@@ -27578,7 +27596,14 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
bool AllowFloatDomain = FloatDomain || (Depth > 3);
- bool AllowIntDomain = !FloatDomain || (Depth > 3);
+ bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
+ (!MaskVT.is256BitVector() || Subtarget.hasAVX2());
+
+ // Determine zeroable mask elements.
+ APInt Zeroable(NumMaskElts, 0);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ if (isUndefOrZero(Mask[i]))
+ Zeroable.setBit(i);
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
@@ -27612,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
@@ -27648,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
@@ -27668,6 +27693,45 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
+ // Typically from here on, we need an integer version of MaskVT.
+ MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits);
+ IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts);
+
+ // Annoyingly, SSE4A instructions don't map into the above match helpers.
+ if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
+ uint64_t BitLen, BitIdx;
+ if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
+ Zeroable)) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+ return false; // Nothing to do!
+ V1 = DAG.getBitcast(IntMaskVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+ return false; // Nothing to do!
+ V1 = DAG.getBitcast(IntMaskVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(IntMaskVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+ }
+
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)
@@ -27688,9 +27752,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
- MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
- MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
- SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
@@ -27719,9 +27781,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (Mask[i] == SM_SentinelZero)
Mask[i] = NumMaskElts + i;
- MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
- MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
- SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
@@ -27746,9 +27806,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
- MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
- MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
- SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
V1 = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(V1.getNode());
@@ -27807,8 +27865,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32);
VPermIdx.push_back(Idx);
}
- MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts);
- SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx);
+ SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
@@ -27831,8 +27888,6 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumLanes = MaskVT.getSizeInBits() / 128;
unsigned NumEltsPerLane = NumMaskElts / NumLanes;
SmallVector<int, 8> VPerm2Idx;
- MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits());
- MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts);
unsigned M2ZImm = 0;
for (int M : Mask) {
if (M == SM_SentinelUndef) {
@@ -27852,7 +27907,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(MaskVT, V2);
DCI.AddToWorklist(V2.getNode());
- SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, MaskIdxVT, DAG, DL, true);
+ SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPerm2MaskOp.getNode());
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getConstant(M2ZImm, DL, MVT::i8));
@@ -29163,9 +29218,9 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// v8i16 and v16i16.
// For these two cases, we can shuffle the upper element bytes to a
// consecutive sequence at the start of the vector and treat the results as
- // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However,
+ // v16i8 or v32i8, and for v61i8 this is the preferable solution. However,
// for v16i16 this is not the case, because the shuffle is expensive, so we
- // avoid sign-exteding to this type entirely.
+ // avoid sign-extending to this type entirely.
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
@@ -29207,7 +29262,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
SExtVT = MVT::v16i8;
// For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
// it is not profitable to sign-extend to 256-bit because this will
- // require an extra cross-lane shuffle which is more exprensive than
+ // require an extra cross-lane shuffle which is more expensive than
// truncating the result of the compare to 128-bits.
break;
case MVT::v32i1:
@@ -29580,8 +29635,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
// (extends the sign bit which is zero).
// So it is correct to skip the sign/zero extend instruction.
if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
- Root.getOpcode() == ISD::ZERO_EXTEND ||
- Root.getOpcode() == ISD::ANY_EXTEND))
+ Root.getOpcode() == ISD::ZERO_EXTEND ||
+ Root.getOpcode() == ISD::ANY_EXTEND))
Root = Root.getOperand(0);
// If there was a match, we want Root to be a select that is the root of an
@@ -34950,6 +35005,40 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ auto *ConstantX = dyn_cast<ConstantSDNode>(X);
+ if (ConstantX) {
+ if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) ||
+ (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) {
+ // This is a complicated way to get -1 or 0 from the carry flag:
+ // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ Y.getOperand(1));
+ }
+
+ if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) ||
+ (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) {
+ SDValue EFLAGS = Y->getOperand(1);
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ // Swap the operands of a SUB, and we have the same pattern as above.
+ // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
+ // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ NewEFLAGS);
+ }
+ }
+ }
+
if (CC == X86::COND_B) {
// X + SETB Z --> X + (mask SBB Z, Z)
// X - SETB Z --> X - (mask SBB Z, Z)
@@ -34996,7 +35085,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
- if (auto *ConstantX = dyn_cast<ConstantSDNode>(X)) {
+ if (ConstantX) {
// 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
// fake operands:
// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
@@ -35549,6 +35638,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
+ case X86ISD::EXTRQI:
+ case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index e1ade92979dc0..dbbc2bbba6a4a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -767,6 +767,19 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE
+ // for given operand and result types.
+ // Example of such a combine:
+ // v4i32 build_vector((extract_elt V, 0),
+ // (extract_elt V, 2),
+ // (extract_elt V, 4),
+ // (extract_elt V, 6))
+ // -->
+ // v4i32 truncate (bitcast V to v4i64)
+ bool isDesirableToCombineBuildVectorToTruncate() const override {
+ return true;
+ }
+
/// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index f3094b781c494..34d4816a25183 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -10488,7 +10488,7 @@ namespace {
return Copy;
}
- // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // Create a virtual register in *TLSBaseAddrReg, and populate it by
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I.getParent()->getParent();
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index e34a90e975b84..859d3288db896 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -32,6 +32,8 @@
#define DEBUG_TYPE "X86-isel"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -56,7 +58,7 @@ private:
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I) const;
- // TODO: remove after suported by Tablegen-erated instruction selection.
+ // TODO: remove after supported by Tablegen-erated instruction selection.
unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc,
uint64_t Alignment) const;
@@ -64,6 +66,8 @@ private:
MachineFunction &MF) const;
bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -75,6 +79,8 @@ private:
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -262,6 +268,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectFrameIndexOrGep(I, MRI, MF))
return true;
+ if (selectGlobalValue(I, MRI, MF))
+ return true;
if (selectConstant(I, MRI, MF))
return true;
if (selectTrunc(I, MRI, MF))
@@ -272,6 +280,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectUadde(I, MRI, MF))
return true;
+ if (selectUnmergeValues(I, MRI, MF))
+ return true;
if (selectMergeValues(I, MRI, MF))
return true;
if (selectExtract(I, MRI, MF))
@@ -423,6 +433,15 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
+ if (Ty == LLT::pointer(0, 64))
+ return X86::LEA64r;
+ else if (Ty == LLT::pointer(0, 32))
+ return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
+ else
+ llvm_unreachable("Can't get LEA opcode. Unsupported type.");
+}
+
bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -435,14 +454,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
LLT Ty = MRI.getType(DefReg);
// Use LEA to calculate frame index and GEP
- unsigned NewOpc;
- if (Ty == LLT::pointer(0, 64))
- NewOpc = X86::LEA64r;
- else if (Ty == LLT::pointer(0, 32))
- NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
- else
- llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type.");
-
+ unsigned NewOpc = getLeaOP(Ty, STI);
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
@@ -458,6 +470,54 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ unsigned Opc = I.getOpcode();
+
+ if (Opc != TargetOpcode::G_GLOBAL_VALUE)
+ return false;
+
+ auto GV = I.getOperand(1).getGlobal();
+ if (GV->isThreadLocal()) {
+ return false; // TODO: we don't support TLS yet.
+ }
+
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
+
+ X86AddressMode AM;
+ AM.GV = GV;
+ AM.GVOpFlags = STI.classifyGlobalReference(GV);
+
+ // TODO: The ABI requires an extra load. not supported yet.
+ if (isGlobalStubReference(AM.GVOpFlags))
+ return false;
+
+ // TODO: This reference is relative to the pic base. not supported yet.
+ if (isGlobalRelativeToPICBase(AM.GVOpFlags))
+ return false;
+
+ if (STI.isPICStyleRIPRel()) {
+ // Use rip-relative addressing.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ unsigned NewOpc = getLeaOP(Ty, STI);
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+
+ I.RemoveOperand(1);
+ addFullAddress(MIB, AM);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
bool X86InstructionSelector::selectConstant(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -467,7 +527,8 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I,
const unsigned DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
- assert(Ty.isScalar() && "invalid element type.");
+ if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
+ return false;
uint64_t Val = 0;
if (I.getOperand(1).isCImm()) {
@@ -576,37 +637,40 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy == LLT::scalar(1)) {
-
- unsigned AndOpc;
- if (DstTy == LLT::scalar(32))
- AndOpc = X86::AND32ri8;
- else if (DstTy == LLT::scalar(64))
- AndOpc = X86::AND64ri8;
- else
- return false;
+ if (SrcTy != LLT::scalar(1))
+ return false;
- unsigned DefReg =
- MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
+ unsigned AndOpc;
+ if (DstTy == LLT::scalar(8))
+ AndOpc = X86::AND8ri;
+ else if (DstTy == LLT::scalar(16))
+ AndOpc = X86::AND16ri8;
+ else if (DstTy == LLT::scalar(32))
+ AndOpc = X86::AND32ri8;
+ else if (DstTy == LLT::scalar(64))
+ AndOpc = X86::AND64ri8;
+ else
+ return false;
+ unsigned DefReg = SrcReg;
+ if (DstTy != LLT::scalar(8)) {
+ DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG), DefReg)
.addImm(0)
.addReg(SrcReg)
.addImm(X86::sub_8bit);
+ }
- MachineInstr &AndInst =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
- .addReg(DefReg)
- .addImm(1);
-
- constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
+ MachineInstr &AndInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
+ .addReg(DefReg)
+ .addImm(1);
- I.eraseFromParent();
- return true;
- }
+ constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
- return false;
+ I.eraseFromParent();
+ return true;
}
bool X86InstructionSelector::selectCmp(MachineInstr &I,
@@ -918,6 +982,33 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
+
+ // Split to extracts.
+ unsigned NumDefs = I.getNumOperands() - 1;
+ unsigned SrcReg = I.getOperand(NumDefs).getReg();
+ unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
+
+ MachineInstr &ExtrInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
+ .addReg(SrcReg)
+ .addImm(Idx * DefSize);
+
+ if (!select(ExtrInst))
+ return false;
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
bool X86InstructionSelector::selectMergeValues(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index a5fa3340c3f12..744ba21011af7 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -69,12 +69,14 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
for (auto Ty : {s8, s16, s32, p0})
setAction({MemOp, Ty}, Legal);
+ setAction({MemOp, s1}, WidenScalar);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
}
// Pointer-handling
setAction({G_FRAME_INDEX, p0}, Legal);
+ setAction({G_GLOBAL_VALUE, p0}, Legal);
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s32}, Legal);
@@ -90,8 +92,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar);
// Extensions
- setAction({G_ZEXT, s32}, Legal);
- setAction({G_SEXT, s32}, Legal);
+ for (auto Ty : {s8, s16, s32}) {
+ setAction({G_ZEXT, Ty}, Legal);
+ setAction({G_SEXT, Ty}, Legal);
+ }
for (auto Ty : {s1, s8, s16}) {
setAction({G_ZEXT, 1, Ty}, Legal);
@@ -125,12 +129,14 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
for (auto Ty : {s8, s16, s32, s64, p0})
setAction({MemOp, Ty}, Legal);
+ setAction({MemOp, s1}, WidenScalar);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
}
// Pointer-handling
setAction({G_FRAME_INDEX, p0}, Legal);
+ setAction({G_GLOBAL_VALUE, p0}, Legal);
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s32}, Legal);
@@ -146,7 +152,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
// Extensions
- for (auto Ty : {s32, s64}) {
+ for (auto Ty : {s8, s16, s32, s64}) {
setAction({G_ZEXT, Ty}, Legal);
setAction({G_SEXT, Ty}, Legal);
}
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 33bc8e11a5729..fd2837b79103e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1042,7 +1042,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
- assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64");
+ assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
@@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI,
return Comment;
}
+static void printConstant(const Constant *COp, raw_ostream &CS) {
+ if (isa<UndefValue>(COp)) {
+ CS << "u";
+ } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
+ if (CI->getBitWidth() <= 64) {
+ CS << CI->getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ const auto &Val = CI->getValue();
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
+ } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
+ SmallString<32> Str;
+ CF->getValueAPF().toString(Str);
+ CS << Str;
+ } else {
+ CS << "?";
+ }
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
@@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// For loads from a constant pool to a vector register, print the constant
// loaded.
CASE_ALL_MOV_RM()
+ case X86::VBROADCASTF128:
+ case X86::VBROADCASTI128:
+ case X86::VBROADCASTF32X4Z256rm:
+ case X86::VBROADCASTF32X4rm:
+ case X86::VBROADCASTF32X8rm:
+ case X86::VBROADCASTF64X2Z128rm:
+ case X86::VBROADCASTF64X2rm:
+ case X86::VBROADCASTF64X4rm:
+ case X86::VBROADCASTI32X4Z256rm:
+ case X86::VBROADCASTI32X4rm:
+ case X86::VBROADCASTI32X8rm:
+ case X86::VBROADCASTI64X2Z128rm:
+ case X86::VBROADCASTI64X2rm:
+ case X86::VBROADCASTI64X4rm:
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() <= 4)
break;
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ int NumLanes = 1;
+ // Override NumLanes for the broadcast instructions.
+ switch (MI->getOpcode()) {
+ case X86::VBROADCASTF128: NumLanes = 2; break;
+ case X86::VBROADCASTI128: NumLanes = 2; break;
+ case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
+ case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
+ case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
+ case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
+ case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
+ case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
+ case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
+ case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
+ case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
+ case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
+ case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
+ case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
+ }
+
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
CS << "[";
- for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
- if (i != 0)
- CS << ",";
- if (CDS->getElementType()->isIntegerTy())
- CS << CDS->getElementAsInteger(i);
- else if (CDS->getElementType()->isFloatTy())
- CS << CDS->getElementAsFloat(i);
- else if (CDS->getElementType()->isDoubleTy())
- CS << CDS->getElementAsDouble(i);
- else
- CS << "?";
+ for (int l = 0; l != NumLanes; ++l) {
+ for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
+ if (i != 0 || l != 0)
+ CS << ",";
+ if (CDS->getElementType()->isIntegerTy())
+ CS << CDS->getElementAsInteger(i);
+ else if (CDS->getElementType()->isFloatTy())
+ CS << CDS->getElementAsFloat(i);
+ else if (CDS->getElementType()->isDoubleTy())
+ CS << CDS->getElementAsDouble(i);
+ else
+ CS << "?";
+ }
}
CS << "]";
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
- for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
- if (i != 0)
- CS << ",";
- Constant *COp = CV->getOperand(i);
- if (isa<UndefValue>(COp)) {
- CS << "u";
- } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- if (CI->getBitWidth() <= 64) {
- CS << CI->getZExtValue();
- } else {
- // print multi-word constant as (w0,w1)
- const auto &Val = CI->getValue();
- CS << "(";
- for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
- if (i > 0)
- CS << ",";
- CS << Val.getRawData()[i];
- }
- CS << ")";
- }
- } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
- SmallString<32> Str;
- CF->getValueAPF().toString(Str);
- CS << Str;
- } else {
- CS << "?";
+ for (int l = 0; l != NumLanes; ++l) {
+ for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
+ if (i != 0 || l != 0)
+ CS << ",";
+ printConstant(CV->getOperand(i), CS);
}
}
CS << ">";
@@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
break;
+ case X86::VBROADCASTSSrm:
+ case X86::VBROADCASTSSYrm:
+ case X86::VBROADCASTSSZ128m:
+ case X86::VBROADCASTSSZ256m:
+ case X86::VBROADCASTSSZm:
+ case X86::VBROADCASTSDYrm:
+ case X86::VBROADCASTSDZ256m:
+ case X86::VBROADCASTSDZm:
+ case X86::VPBROADCASTBrm:
+ case X86::VPBROADCASTBYrm:
+ case X86::VPBROADCASTBZ128m:
+ case X86::VPBROADCASTBZ256m:
+ case X86::VPBROADCASTBZm:
+ case X86::VPBROADCASTDrm:
+ case X86::VPBROADCASTDYrm:
+ case X86::VPBROADCASTDZ128m:
+ case X86::VPBROADCASTDZ256m:
+ case X86::VPBROADCASTDZm:
+ case X86::VPBROADCASTQrm:
+ case X86::VPBROADCASTQYrm:
+ case X86::VPBROADCASTQZ128m:
+ case X86::VPBROADCASTQZ256m:
+ case X86::VPBROADCASTQZm:
+ case X86::VPBROADCASTWrm:
+ case X86::VPBROADCASTWYrm:
+ case X86::VPBROADCASTWZ128m:
+ case X86::VPBROADCASTWZ256m:
+ case X86::VPBROADCASTWZm:
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ if (MI->getNumOperands() <= 4)
+ break;
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ int NumElts;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VBROADCASTSSrm: NumElts = 4; break;
+ case X86::VBROADCASTSSYrm: NumElts = 8; break;
+ case X86::VBROADCASTSSZ128m: NumElts = 4; break;
+ case X86::VBROADCASTSSZ256m: NumElts = 8; break;
+ case X86::VBROADCASTSSZm: NumElts = 16; break;
+ case X86::VBROADCASTSDYrm: NumElts = 4; break;
+ case X86::VBROADCASTSDZ256m: NumElts = 4; break;
+ case X86::VBROADCASTSDZm: NumElts = 8; break;
+ case X86::VPBROADCASTBrm: NumElts = 16; break;
+ case X86::VPBROADCASTBYrm: NumElts = 32; break;
+ case X86::VPBROADCASTBZ128m: NumElts = 16; break;
+ case X86::VPBROADCASTBZ256m: NumElts = 32; break;
+ case X86::VPBROADCASTBZm: NumElts = 64; break;
+ case X86::VPBROADCASTDrm: NumElts = 4; break;
+ case X86::VPBROADCASTDYrm: NumElts = 8; break;
+ case X86::VPBROADCASTDZ128m: NumElts = 4; break;
+ case X86::VPBROADCASTDZ256m: NumElts = 8; break;
+ case X86::VPBROADCASTDZm: NumElts = 16; break;
+ case X86::VPBROADCASTQrm: NumElts = 2; break;
+ case X86::VPBROADCASTQYrm: NumElts = 4; break;
+ case X86::VPBROADCASTQZ128m: NumElts = 2; break;
+ case X86::VPBROADCASTQZ256m: NumElts = 4; break;
+ case X86::VPBROADCASTQZm: NumElts = 8; break;
+ case X86::VPBROADCASTWrm: NumElts = 8; break;
+ case X86::VPBROADCASTWYrm: NumElts = 16; break;
+ case X86::VPBROADCASTWZ128m: NumElts = 8; break;
+ case X86::VPBROADCASTWZ256m: NumElts = 16; break;
+ case X86::VPBROADCASTWZm: NumElts = 32; break;
+ }
+
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ CS << "[";
+ for (int i = 0; i != NumElts; ++i) {
+ if (i != 0)
+ CS << ",";
+ printConstant(C, CS);
+ }
+ CS << "]";
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ }
}
MCInst TmpInst;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index b8ec5883152c3..6d85ca6cad647 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel {
// Based on the LSD (loop-stream detector) queue size.
let LoopMicroOpBufferSize = 28;
- // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
- // the scheduler to assign a default model to unrecognized opcodes.
+ // This flag is set to allow the scheduler to assign
+ // a default model to unrecognized opcodes.
let CompleteModel = 0;
}
@@ -48,6 +48,7 @@ def SBPort23 : ProcResource<2>;
def SBPort4 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
+def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>;
def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
@@ -115,10 +116,10 @@ def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
// Scalar and vector floating point.
defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
-defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFDiv, SBPort0, 24>;
defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
-defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
+defm : SBWriteResPair<WriteFSqrt, SBPort0, 14>;
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
@@ -134,11 +135,11 @@ def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
}
// Vector integer operations.
-defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
-defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
-defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
+defm : SBWriteResPair<WriteVecShift, SBPort5, 1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort5, 1>;
+defm : SBWriteResPair<WriteVecALU, SBPort1, 3>;
defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
-defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
+defm : SBWriteResPair<WriteShuffle, SBPort5, 1>;
defm : SBWriteResPair<WriteBlend, SBPort15, 1>;
def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
let Latency = 2;
@@ -148,13 +149,15 @@ def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
-def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
- let Latency = 6;
- let ResourceCycles = [1, 1, 1];
+def : WriteRes<WriteMPSAD, [SBPort0,SBPort15]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
}
-def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
- let Latency = 6;
- let ResourceCycles = [1, 1, 1, 1];
+def : WriteRes<WriteMPSADLd, [SBPort0,SBPort23,SBPort15]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
}
////////////////////////////////////////////////////////////////////////////////
@@ -204,13 +207,15 @@ def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
}
// Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [SBPort015]> {
- let Latency = 3;
+def : WriteRes<WritePCmpIStrI, [SBPort0]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
let ResourceCycles = [3];
}
-def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
- let Latency = 3;
- let ResourceCycles = [3, 1];
+def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
+ let Latency = 17;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
@@ -224,22 +229,26 @@ def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
}
// AES Instructions.
-def : WriteRes<WriteAESDecEnc, [SBPort015]> {
- let Latency = 8;
- let ResourceCycles = [2];
+def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
}
-def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
- let Latency = 8;
- let ResourceCycles = [2, 1];
+def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
}
-def : WriteRes<WriteAESIMC, [SBPort015]> {
- let Latency = 8;
+def : WriteRes<WriteAESIMC, [SBPort5]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
- let Latency = 8;
- let ResourceCycles = [2, 1];
+def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
+ let Latency = 18;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [SBPort015]> {
@@ -272,4 +281,2407 @@ def : WriteRes<WriteNop, []>;
defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>;
+
+// Remaining SNB instrs.
+
+def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>;
+
+def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>;
+def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>;
+def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>;
+def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>;
+
+def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>;
+
+def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>;
+
+def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
+def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>;
+
+def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>;
+
+def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>;
+
+def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
+ let Latency = 2;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>;
+
+def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>;
+
+def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>;
+
+def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>;
+
+def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>;
+
+def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>;
+
+def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>;
+
+def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>;
+
+def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>;
+
+def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>;
+
+def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
+
+def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>;
+
+def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>;
+
+def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
+
+def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>;
+def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>;
+
+def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>;
+def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>;
+def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>;
+
+def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>;
+
+def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
+def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>;
+def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>;
+
+def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>;
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>;
+
+def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>;
+
+def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>;
+
+def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>;
+
+def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>;
+def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>;
+
+def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
+
+def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>;
+
+def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>;
+
+def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>;
+def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>;
+
+def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>;
+
+def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>;
+def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>;
+def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>;
+
+def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
+
+def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>;
+
+def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>;
+
+def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>;
+
+def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>;
+
+def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>;
+def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>;
+
+def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>;
+def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>;
+
+def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>;
+
+def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>;
+
+def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
+
+def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
+
+def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>;
+
+def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>;
+def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>;
+
+def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>;
+
+def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>;
+
+def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>;
+
+def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>;
+def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>;
+
+def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>;
+
+def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>;
+
+def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>;
+
+def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>;
+
+def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>;
+
+def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>;
+
+def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>;
+
+def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>;
+def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>;
+
+def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>;
+def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>;
+
+def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>;
+def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>;
+
+def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>;
+
+def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>;
+
+def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>;
+
+def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>;
+def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>;
+
+def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>;
+def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>;
+
+def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
+
+def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>;
+
+def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>;
+def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>;
+def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>;
+def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>;
+
+def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>;
+
+def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>;
+
+def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>;
+def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>;
+
+def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>;
+def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>;
+
+def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>;
+def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>;
+
+def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>;
+
+def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>;
+def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>;
+
+def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>;
+
+def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>;
+
+def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>;
+
+def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>;
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>;
+
+def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2,3];
+}
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>;
+
+def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>;
+
+def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>;
+
+def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>;
+def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>;
+def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>;
+
+def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>;
+
+def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>;
+def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>;
+
+def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>;
+
+def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>;
+
+def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>;
+
+def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>;
+def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>;
+
+def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>;
+
+def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>;
+
+def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>;
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>;
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>;
+
+def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>;
+
+def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>;
+def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>;
+
+def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,2,3];
+}
+def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
+
+def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,2,2,1];
+}
+def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>;
+
+def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>;
+
+def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>;
+
+def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>;
+
+def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>;
+
+def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>;
+
+def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>;
+
+def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>;
+
+def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>;
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>;
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>;
+
+def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>;
+def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>;
+
+def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>;
+
+def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>;
+def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>;
+
+def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>;
+def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>;
+def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>;
+def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>;
+
+def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>;
+def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>;
+def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>;
+
+def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>;
+
+def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>;
+
+def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>;
+
+def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> {
+ let Latency = 14;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>;
+
+def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
+
+def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>;
+def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>;
+
+def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>;
+def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>;
+
+def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
+ let Latency = 15;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>;
+def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>;
+
+def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 17;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
+}
+def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>;
+
+def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> {
+ let Latency = 18;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>;
+def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>;
+
+def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 20;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>;
+
+def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> {
+ let Latency = 21;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>;
+
+def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>;
+
+def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> {
+ let Latency = 22;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>;
+
+def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> {
+ let Latency = 24;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>;
+
+def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 28;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>;
+
+def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> {
+ let Latency = 29;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
+def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>;
+
+def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
+ let Latency = 31;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>;
+
+def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+ let Latency = 34;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>;
+
+def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+ let Latency = 36;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
+def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>;
+
+def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> {
+ let Latency = 45;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
+def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>;
+
+def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+ let Latency = 52;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
+def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>;
+
+def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> {
+ let Latency = 114;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index 6cb2a3694d92e..ed53893b779ce 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -369,5 +369,82 @@ def : WriteRes<WriteSystem, [JAny]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [JAny]> { let Latency = 100; }
def : WriteRes<WriteFence, [JSAGU]>;
def : WriteRes<WriteNop, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def WriteFAddY: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteFAddY], (instregex "VADD(SUB)?P(S|D)Yrr", "VSUBP(S|D)Yrr")>;
+
+def WriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteFAddYLd, ReadAfterLd], (instregex "VADD(SUB)?P(S|D)Yrm", "VSUBP(S|D)Yrm")>;
+
+def WriteFDivY: SchedWriteRes<[JFPU1]> {
+ let Latency = 38;
+ let ResourceCycles = [38];
+}
+def : InstRW<[WriteFDivY], (instregex "VDIVP(D|S)Yrr")>;
+
+def WriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 43;
+ let ResourceCycles = [1, 38];
+}
+def : InstRW<[WriteFDivYLd, ReadAfterLd], (instregex "VDIVP(S|D)Yrm")>;
+
+def WriteVMULYPD: SchedWriteRes<[JFPU1]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+}
+def : InstRW<[WriteVMULYPD], (instregex "VMULPDYrr")>;
+
+def WriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instregex "VMULPDYrm")>;
+
+def WriteVMULYPS: SchedWriteRes<[JFPU1]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteVMULYPS], (instregex "VMULPSYrr", "VRCPPSYr", "VRSQRTPSYr")>;
+
+def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>;
+
+def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> {
+ let Latency = 54;
+ let ResourceCycles = [54];
+}
+def : InstRW<[WriteVSQRTYPD], (instregex "VSQRTPDYr")>;
+
+def WriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 59;
+ let ResourceCycles = [1, 54];
+}
+def : InstRW<[WriteVSQRTYPDLd], (instregex "VSQRTPDYm")>;
+
+def WriteVSQRTYPS: SchedWriteRes<[JFPU1]> {
+ let Latency = 42;
+ let ResourceCycles = [42];
+}
+def : InstRW<[WriteVSQRTYPS], (instregex "VSQRTPSYr")>;
+
+def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 47;
+ let ResourceCycles = [1, 42];
+}
+def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 5ba8534d32d33..c9924f264939d 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -142,10 +142,15 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::FDIV, MVT::v2f64, 69 }, // divpd
{ ISD::FADD, MVT::v2f64, 2 }, // addpd
{ ISD::FSUB, MVT::v2f64, 2 }, // subpd
- // v2i64/v4i64 mul is custom lowered as a series of long
- // multiplies(3), shifts(3) and adds(2).
- // slm muldq version throughput is 2
- { ISD::MUL, MVT::v2i64, 11 },
+ // v2i64/v4i64 mul is custom lowered as a series of long:
+ // multiplies(3), shifts(3) and adds(2)
+ // slm muldq version throughput is 2 and addq throughput 4
+ // thus: 3X2 (muldq throughput) + 3X1 (shift throuput) +
+ // 3X4 (addq throughput) = 17
+ { ISD::MUL, MVT::v2i64, 17 },
+ // slm addq\subq throughput is 4
+ { ISD::ADD, MVT::v2i64, 4 },
+ { ISD::SUB, MVT::v2i64, 4 },
};
if (ST->isSLM()) {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index d8cf8d3f5da21..53223ab443161 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -124,6 +124,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
+
+ // There may be remaining metadata uses of the argument for things like
+ // llvm.dbg.value. Replace them with undef.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
} else {
// Okay, this is being promoted. This means that the only uses are loads
// or GEPs which are only used by loads
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6d34ab8b0d960..233a36d2bc543 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -64,6 +64,12 @@ static cl::opt<float> ImportHotMultiplier(
"import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"),
cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
+static cl::opt<float> ImportCriticalMultiplier(
+ "import-critical-multiplier", cl::init(100.0), cl::Hidden,
+ cl::value_desc("x"),
+ cl::desc(
+ "Multiply the `import-instr-limit` threshold for critical callsites"));
+
// FIXME: This multiplier was not really tuned up.
static cl::opt<float> ImportColdMultiplier(
"import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
@@ -207,6 +213,8 @@ static void computeImportForFunction(
return ImportHotMultiplier;
if (Hotness == CalleeInfo::HotnessType::Cold)
return ImportColdMultiplier;
+ if (Hotness == CalleeInfo::HotnessType::Critical)
+ return ImportCriticalMultiplier;
return 1.0;
};
@@ -537,8 +545,6 @@ void llvm::thinLTOResolveWeakForLinkerModule(
};
auto updateLinkage = [&](GlobalValue &GV) {
- if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
- return;
// See if the global summary analysis computed a new resolved linkage.
const auto &GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end())
@@ -546,6 +552,21 @@ void llvm::thinLTOResolveWeakForLinkerModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
+
+ // Switch the linkage to weakany if asked for, e.g. we do this for
+ // linker redefined symbols (via --wrap or --defsym).
+ // We record that the visibility should be changed here in `addThinLTO`
+ // as we need access to the resolution vectors for each input file in
+ // order to find which symbols have been redefined.
+ // We may consider reorganizing this code and moving the linkage recording
+ // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex.
+ if (NewLinkage == GlobalValue::WeakAnyLinkage) {
+ GV.setLinkage(NewLinkage);
+ return;
+ }
+
+ if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+ return;
// Check for a non-prevailing def that has interposable linkage
// (e.g. non-odr weak or linkonce). In that case we can't simply
// convert to available_externally, since it would lose the
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index f277a51ae659a..3d57acf06e746 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
- SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->getOrdering(), SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
@@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
- LI->getOrdering(), LI->getSynchScope(),
+ LI->getOrdering(), LI->getSyncScopeID(),
LI->isUnordered() ? (Instruction*)ICI : LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
@@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSynchScope(), LI);
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
"This is not a form that we understand!");
@@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
}
new StoreInst(StoreVal, NewGV, false, 0,
- SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->getOrdering(), SI->getSyncScopeID(), SI);
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSynchScope(), LI);
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
Value *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index ad89e40661c67..00ddb93df830a 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -989,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// And delete the actual function from the module.
M.getFunctionList().erase(DeadF);
}
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ // Even if we change the IR, we update the core CGSCC data structures and so
+ // can preserve the proxy to the function analysis manager.
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ return PA;
}
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index b406c22c69d7a..693df5e7ba925 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -855,15 +855,20 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
Name + ".cfi_jt", &M);
FDecl->setVisibility(GlobalValue::HiddenVisibility);
- } else {
- // Definition.
- assert(isDefinition);
+ } else if (isDefinition) {
F->setName(Name + ".cfi");
F->setLinkage(GlobalValue::ExternalLinkage);
F->setVisibility(GlobalValue::HiddenVisibility);
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
Name, &M);
FDecl->setVisibility(Visibility);
+ } else {
+ // Function definition without type metadata, where some other translation
+ // unit contained a declaration with type metadata. This normally happens
+ // during mixed CFI + non-CFI compilation. We do nothing with the function
+ // so that it is treated the same way as a function defined outside of the
+ // LTO unit.
+ return;
}
if (F->isWeakForLinker())
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5b1b58b89c32e..0b319f6a488b3 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -188,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() {
static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+/// Check if GlobalExtensions is constructed and not empty.
+/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
+/// the construction of the object.
+static bool GlobalExtensionsNotEmpty() {
+ return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
+}
+
void PassManagerBuilder::addGlobalExtension(
PassManagerBuilder::ExtensionPointTy Ty,
PassManagerBuilder::ExtensionFn Fn) {
@@ -200,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
legacy::PassManagerBase &PM) const {
- for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
- if ((*GlobalExtensions)[i].first == ETy)
- (*GlobalExtensions)[i].second(*this, PM);
+ if (GlobalExtensionsNotEmpty()) {
+ for (auto &Ext : *GlobalExtensions) {
+ if (Ext.first == ETy)
+ Ext.second(*this, PM);
+ }
+ }
for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
if (Extensions[i].first == ETy)
Extensions[i].second(*this, PM);
@@ -415,7 +425,7 @@ void PassManagerBuilder::populateModulePassManager(
// builds. The function merging pass is
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
- else if (!GlobalExtensions->empty() || !Extensions.empty())
+ else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 8d494fe9cde28..8ef6bb6523093 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -271,7 +271,8 @@ void splitAndWriteThinLTOBitcode(
if (!ArgT || ArgT->getBitWidth() > 64)
return;
}
- if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ if (!F->isDeclaration() &&
+ computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
EligibleVirtualFns.insert(F);
});
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d5f0dd1914157..809471cfd74f0 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -164,7 +164,7 @@ namespace {
///
class FAddCombine {
public:
- FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {}
+ FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {}
Value *simplify(Instruction *FAdd);
private:
@@ -187,7 +187,7 @@ namespace {
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
- InstCombiner::BuilderTy *Builder;
+ InstCombiner::BuilderTy &Builder;
Instruction *Instr;
// Debugging stuff are clustered here.
@@ -735,7 +735,7 @@ Value *FAddCombine::createNaryFAdd
}
Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {
- Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+ Value *V = Builder.CreateFSub(Opnd0, Opnd1);
if (Instruction *I = dyn_cast<Instruction>(V))
createInstPostProc(I);
return V;
@@ -750,21 +750,21 @@ Value *FAddCombine::createFNeg(Value *V) {
}
Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {
- Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+ Value *V = Builder.CreateFAdd(Opnd0, Opnd1);
if (Instruction *I = dyn_cast<Instruction>(V))
createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
- Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+ Value *V = Builder.CreateFMul(Opnd0, Opnd1);
if (Instruction *I = dyn_cast<Instruction>(V))
createInstPostProc(I);
return V;
}
Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
- Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ Value *V = Builder.CreateFDiv(Opnd0, Opnd1);
if (Instruction *I = dyn_cast<Instruction>(V))
createInstPostProc(I);
return V;
@@ -895,7 +895,7 @@ bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS,
// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
static Value *checkForNegativeOperand(BinaryOperator &I,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// This function creates 2 instructions to replace ADD, we need at least one
@@ -919,13 +919,13 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
// X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
- Value *NewAnd = Builder->CreateAnd(Z, *C1);
- return Builder->CreateSub(RHS, NewAnd, "sub");
+ Value *NewAnd = Builder.CreateAnd(Z, *C1);
+ return Builder.CreateSub(RHS, NewAnd, "sub");
} else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {
// X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
- Value *NewOr = Builder->CreateOr(Z, ~(*C1));
- return Builder->CreateSub(RHS, NewOr, "sub");
+ Value *NewOr = Builder.CreateOr(Z, ~(*C1));
+ return Builder.CreateSub(RHS, NewOr, "sub");
}
}
}
@@ -944,8 +944,8 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
if (C1->countTrailingZeros() == 0)
if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
- Value *NewOr = Builder->CreateOr(Z, ~(*C2));
- return Builder->CreateSub(RHS, NewOr, "sub");
+ Value *NewOr = Builder.CreateOr(Z, ~(*C2));
+ return Builder.CreateSub(RHS, NewOr, "sub");
}
return nullptr;
}
@@ -1027,7 +1027,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldAddWithConstant(I, *Builder))
+ if (Instruction *X = foldAddWithConstant(I, Builder))
return X;
// FIXME: This should be moved into the above helper function to allow these
@@ -1060,7 +1060,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (ExtendAmt) {
Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
- Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+ Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext");
return BinaryOperator::CreateAShr(NewShl, ShAmt);
}
@@ -1084,7 +1084,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *NV = foldOpWithConstantIntoOperand(I))
return NV;
- if (I.getType()->getScalarType()->isIntegerTy(1))
+ if (I.getType()->isIntOrIntVectorTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
// X + X --> X << 1
@@ -1101,7 +1101,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *LHSV = dyn_castNegVal(LHS)) {
if (!isa<Constant>(RHS))
if (Value *RHSV = dyn_castNegVal(RHS)) {
- Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum");
return BinaryOperator::CreateNeg(NewAdd);
}
@@ -1148,7 +1148,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (AddRHSHighBits == AddRHSHighBitsAnd) {
// Okay, the xform is safe. Insert the new add pronto.
- Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName());
return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
@@ -1191,7 +1191,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd =
- Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
+ Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
return new SExtInst(NewAdd, I.getType());
}
}
@@ -1208,7 +1208,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
willNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
return new SExtInst(NewAdd, I.getType());
}
@@ -1227,7 +1227,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd =
- Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
+ Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
return new ZExtInst(NewAdd, I.getType());
}
}
@@ -1244,7 +1244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNUWAdd(
+ Value *NewAdd = Builder.CreateNUWAdd(
LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
return new ZExtInst(NewAdd, I.getType());
}
@@ -1362,8 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- CI, "addconv");
+ Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}
@@ -1381,8 +1380,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- RHSIntVal, "addconv");
+ Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}
@@ -1480,14 +1478,14 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// pointer, subtract it from the offset we have.
if (GEP2) {
Value *Offset = EmitGEPOffset(GEP2);
- Result = Builder->CreateSub(Result, Offset);
+ Result = Builder.CreateSub(Result, Offset);
}
// If we have p - gep(p, ...) then we have to negate the result.
if (Swapped)
- Result = Builder->CreateNeg(Result, "diff.neg");
+ Result = Builder.CreateNeg(Result, "diff.neg");
- return Builder->CreateIntCast(Result, Ty, true);
+ return Builder.CreateIntCast(Result, Ty, true);
}
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
@@ -1522,7 +1520,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Res;
}
- if (I.getType()->getScalarType()->isIntegerTy(1))
+ if (I.getType()->isIntOrIntVectorTy(1))
return BinaryOperator::CreateXor(Op0, Op1);
// Replace (-1 - A) with (~A).
@@ -1552,12 +1550,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Fold (sub 0, (zext bool to B)) --> (sext bool to B)
if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X))))
- if (X->getType()->getScalarType()->isIntegerTy(1))
+ if (X->getType()->isIntOrIntVectorTy(1))
return CastInst::CreateSExtOrBitCast(X, Op1->getType());
// Fold (sub 0, (sext bool to B)) --> (zext bool to B)
if (C->isNullValue() && match(Op1, m_SExt(m_Value(X))))
- if (X->getType()->getScalarType()->isIntegerTy(1))
+ if (X->getType()->isIntOrIntVectorTy(1))
return CastInst::CreateZExtOrBitCast(X, Op1->getType());
}
@@ -1615,7 +1613,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// ((X | Y) - X) --> (~X & Y)
if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))
return BinaryOperator::CreateAnd(
- Y, Builder->CreateNot(Op1, Op1->getName() + ".not"));
+ Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));
}
if (Op1->hasOneUse()) {
@@ -1625,13 +1623,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// (X - (Y - Z)) --> (X + (Z - Y)).
if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
return BinaryOperator::CreateAdd(Op0,
- Builder->CreateSub(Z, Y, Op1->getName()));
+ Builder.CreateSub(Z, Y, Op1->getName()));
// (X - (X & Y)) --> (X & ~Y)
//
if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0))))
return BinaryOperator::CreateAnd(Op0,
- Builder->CreateNot(Y, Y->getName() + ".not"));
+ Builder.CreateNot(Y, Y->getName() + ".not"));
// 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
@@ -1648,7 +1646,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// 'nuw' is dropped in favor of the canonical form.
if (match(Op1, m_SExt(m_Value(Y))) &&
Y->getType()->getScalarSizeInBits() == 1) {
- Value *Zext = Builder->CreateZExt(Y, I.getType());
+ Value *Zext = Builder.CreateZExt(Y, I.getType());
BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext);
Add->setHasNoSignedWrap(I.hasNoSignedWrap());
return Add;
@@ -1659,13 +1657,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *A, *B;
Constant *CI;
if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
- return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+ return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B));
// X - A*CI -> X + A*-CI
// No need to handle commuted multiply because multiply handling will
// ensure constant will be move to the right hand side.
if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) {
- Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI));
return BinaryOperator::CreateAdd(Op0, NewMul);
}
}
@@ -1729,14 +1727,14 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
}
if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) {
if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) {
- Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType());
+ Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType());
Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc);
NewI->copyFastMathFlags(&I);
return NewI;
}
} else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) {
if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) {
- Value *NewExt = Builder->CreateFPExt(V, I.getType());
+ Value *NewExt = Builder.CreateFPExt(V, I.getType());
Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt);
NewI->copyFastMathFlags(&I);
return NewI;
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index db98be2c98f51..773c86e23707f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -54,17 +54,17 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) {
/// instruction. The sign is passed in to determine which kind of predicate to
/// use in the new icmp instruction.
static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate NewPred;
if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
return NewConstant;
- return Builder->CreateICmp(NewPred, LHS, RHS);
+ return Builder.CreateICmp(NewPred, LHS, RHS);
}
/// This is the complement of getFCmpCode, which turns an opcode and two
/// operands into either a FCmp instruction, or a true/false constant.
static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
const auto Pred = static_cast<FCmpInst::Predicate>(Code);
assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
"Unexpected FCmp predicate!");
@@ -72,53 +72,45 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
if (Pred == FCmpInst::FCMP_TRUE)
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
- return Builder->CreateFCmp(Pred, LHS, RHS);
+ return Builder.CreateFCmp(Pred, LHS, RHS);
}
-/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B))
+/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
+/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
/// \param I Binary operator to transform.
/// \return Pointer to node that must replace the original binary operator, or
/// null pointer if no transformation was made.
-Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
- IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-
- // Can't do vectors.
- if (I.getType()->isVectorTy())
- return nullptr;
-
- // Can only do bitwise ops.
- if (!I.isBitwiseLogicOp())
- return nullptr;
+static Value *SimplifyBSwap(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
Value *OldLHS = I.getOperand(0);
Value *OldRHS = I.getOperand(1);
- ConstantInt *ConstLHS = dyn_cast<ConstantInt>(OldLHS);
- ConstantInt *ConstRHS = dyn_cast<ConstantInt>(OldRHS);
- IntrinsicInst *IntrLHS = dyn_cast<IntrinsicInst>(OldLHS);
- IntrinsicInst *IntrRHS = dyn_cast<IntrinsicInst>(OldRHS);
- bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap);
- bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap);
-
- if (!IsBswapLHS && !IsBswapRHS)
- return nullptr;
-
- if (!IsBswapLHS && !ConstLHS)
- return nullptr;
- if (!IsBswapRHS && !ConstRHS)
+ Value *NewLHS;
+ if (!match(OldLHS, m_BSwap(m_Value(NewLHS))))
return nullptr;
- /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
- /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
- Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) :
- Builder->getInt(ConstLHS->getValue().byteSwap());
+ Value *NewRHS;
+ const APInt *C;
- Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) :
- Builder->getInt(ConstRHS->getValue().byteSwap());
+ if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) {
+ // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+ if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse())
+ return nullptr;
+ // NewRHS initialized by the matcher.
+ } else if (match(OldRHS, m_APInt(C))) {
+ // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+ if (!OldLHS->hasOneUse())
+ return nullptr;
+ NewRHS = ConstantInt::get(I.getType(), C->byteSwap());
+ } else
+ return nullptr;
- Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
- Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
- return Builder->CreateCall(F, BinOp);
+ Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap,
+ I.getType());
+ return Builder.CreateCall(F, BinOp);
}
/// This handles expressions of the form ((val OP C1) & C2). Where
@@ -137,7 +129,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
case Instruction::Xor:
if (Op->hasOneUse()) {
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
- Value *And = Builder->CreateAnd(X, AndRHS);
+ Value *And = Builder.CreateAnd(X, AndRHS);
And->takeName(Op);
return BinaryOperator::CreateXor(And, Together);
}
@@ -150,7 +142,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
// NOTE: This reduces the number of bits set in the & mask, which
// can expose opportunities for store narrowing.
Together = ConstantExpr::getXor(AndRHS, Together);
- Value *And = Builder->CreateAnd(X, Together);
+ Value *And = Builder.CreateAnd(X, Together);
And->takeName(Op);
return BinaryOperator::CreateOr(And, OpRHS);
}
@@ -182,7 +174,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
return &TheAnd;
} else {
// Pull the XOR out of the AND.
- Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+ Value *NewAnd = Builder.CreateAnd(X, AndRHS);
NewAnd->takeName(Op);
return BinaryOperator::CreateXor(NewAnd, AndRHS);
}
@@ -198,7 +190,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
- ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask);
+ ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask);
if (CI->getValue() == ShlMask)
// Masking out bits that the shift already masks.
@@ -218,7 +210,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask);
+ ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask);
if (CI->getValue() == ShrMask)
// Masking out bits that the shift already masks.
@@ -238,12 +230,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask);
+ Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask);
if (C == AndRHS) { // Masking out bits shifted in.
// (Val ashr C1) & C2 -> (Val lshr C1) & C2
// Make the argument unsigned.
Value *ShVal = Op->getOperand(0);
- ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+ ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName());
return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
}
}
@@ -269,15 +261,15 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) {
Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred;
- return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
+ return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
}
// V >= Lo && V < Hi --> V - Lo u< Hi - Lo
// V < Lo || V >= Hi --> V - Lo u>= Hi - Lo
Value *VMinusLo =
- Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
+ Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo);
- return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo);
+ return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo);
}
/// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
@@ -523,7 +515,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
- llvm::InstCombiner::BuilderTy *Builder) {
+ llvm::InstCombiner::BuilderTy &Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
unsigned Mask =
@@ -556,27 +548,27 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
if (Mask & Mask_AllZeros) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
// -> (icmp eq (A & (B|D)), 0)
- Value *NewOr = Builder->CreateOr(B, D);
- Value *NewAnd = Builder->CreateAnd(A, NewOr);
+ Value *NewOr = Builder.CreateOr(B, D);
+ Value *NewAnd = Builder.CreateAnd(A, NewOr);
// We can't use C as zero because we might actually handle
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
// with B and D, having a single bit set.
Value *Zero = Constant::getNullValue(A->getType());
- return Builder->CreateICmp(NewCC, NewAnd, Zero);
+ return Builder.CreateICmp(NewCC, NewAnd, Zero);
}
if (Mask & BMask_AllOnes) {
// (icmp eq (A & B), B) & (icmp eq (A & D), D)
// -> (icmp eq (A & (B|D)), (B|D))
- Value *NewOr = Builder->CreateOr(B, D);
- Value *NewAnd = Builder->CreateAnd(A, NewOr);
- return Builder->CreateICmp(NewCC, NewAnd, NewOr);
+ Value *NewOr = Builder.CreateOr(B, D);
+ Value *NewAnd = Builder.CreateAnd(A, NewOr);
+ return Builder.CreateICmp(NewCC, NewAnd, NewOr);
}
if (Mask & AMask_AllOnes) {
// (icmp eq (A & B), A) & (icmp eq (A & D), A)
// -> (icmp eq (A & (B&D)), A)
- Value *NewAnd1 = Builder->CreateAnd(B, D);
- Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1);
- return Builder->CreateICmp(NewCC, NewAnd2, A);
+ Value *NewAnd1 = Builder.CreateAnd(B, D);
+ Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1);
+ return Builder.CreateICmp(NewCC, NewAnd2, A);
}
// Remaining cases assume at least that B and D are constant, and depend on
@@ -644,10 +636,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
(CCst->getValue() ^ ECst->getValue())).getBoolValue())
return ConstantInt::get(LHS->getType(), !IsAnd);
- Value *NewOr1 = Builder->CreateOr(B, D);
+ Value *NewOr1 = Builder.CreateOr(B, D);
Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
- Value *NewAnd = Builder->CreateAnd(A, NewOr1);
- return Builder->CreateICmp(NewCC, NewAnd, NewOr2);
+ Value *NewAnd = Builder.CreateAnd(A, NewOr1);
+ return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
}
return nullptr;
@@ -705,13 +697,13 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
if (Inverted)
NewPred = ICmpInst::getInversePredicate(NewPred);
- return Builder->CreateICmp(NewPred, Input, RangeEnd);
+ return Builder.CreateICmp(NewPred, Input, RangeEnd);
}
static Value *
foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
bool JoinedByAnd,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
Value *X = LHS->getOperand(0);
if (X != RHS->getOperand(0))
return nullptr;
@@ -742,8 +734,8 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
// (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
// We choose an 'or' with a Pow2 constant rather than the inverse mask with
// 'and' because that may lead to smaller codegen from a smaller constant.
- Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor));
- return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
+ Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor));
+ return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
}
// Special case: get the ordering right when the values wrap around zero.
@@ -755,9 +747,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
// (X == 13 || X == 14) --> X - 13 <=u 1
// (X != 13 && X != 14) --> X - 13 >u 1
// An 'add' is the canonical IR form, so favor that over a 'sub'.
- Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
+ Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
- return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
+ return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
}
return nullptr;
@@ -793,10 +785,10 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
if (A == C &&
isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) &&
isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) {
- Value *Mask = Builder->CreateOr(B, D);
- Value *Masked = Builder->CreateAnd(A, Mask);
+ Value *Mask = Builder.CreateOr(B, D);
+ Value *Masked = Builder.CreateAnd(A, Mask);
auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
- return Builder->CreateICmp(NewPred, Masked, Mask);
+ return Builder.CreateICmp(NewPred, Masked, Mask);
}
}
@@ -855,8 +847,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
(PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
- Value *NewOr = Builder->CreateOr(LHS0, RHS0);
- return Builder->CreateICmp(PredL, NewOr, LHSC);
+ Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewOr, LHSC);
}
}
@@ -888,10 +880,10 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
if ((Low & AndC->getValue()).isNullValue() &&
(Low & BigC->getValue()).isNullValue()) {
- Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue());
+ Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue());
APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
- return Builder->CreateICmp(PredL, NewAnd, NewVal);
+ return Builder.CreateICmp(PredL, NewAnd, NewVal);
}
}
}
@@ -943,14 +935,14 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_ULT:
if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13
- return Builder->CreateICmpULT(LHS0, LHSC);
- if (LHSC->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13
+ return Builder.CreateICmpULT(LHS0, LHSC);
+ if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
false, true);
break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
- return Builder->CreateICmpSLT(LHS0, LHSC);
+ return Builder.CreateICmpSLT(LHS0, LHSC);
break; // (X != 13 & X s< 15) -> no change
case ICmpInst::ICMP_NE:
// Potential folds for this case should already be handled.
@@ -963,7 +955,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
- return Builder->CreateICmp(PredL, LHS0, RHSC);
+ return Builder.CreateICmp(PredL, LHS0, RHSC);
break; // (X u> 13 & X != 15) -> no change
case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
@@ -976,7 +968,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
- return Builder->CreateICmp(PredL, LHS0, RHSC);
+ return Builder.CreateICmp(PredL, LHS0, RHSC);
break; // (X s> 13 & X != 15) -> no change
case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
@@ -1025,15 +1017,15 @@ Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// false.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return Builder->getFalse();
- return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return Builder.getFalse();
+ return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp ord x,x" is "fcmp ord x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
- return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
return nullptr;
}
@@ -1088,7 +1080,7 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
/// Fold {and,or,xor} (cast X), C.
static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
Constant *C;
if (!match(Logic.getOperand(1), m_Constant(C)))
return nullptr;
@@ -1107,7 +1099,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
if (ZextTruncC == C) {
// LogicOpc (zext X), C --> zext (LogicOpc X, C)
- Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC);
+ Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
return new ZExtInst(NewOp, DestTy);
}
}
@@ -1150,7 +1142,7 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
// fold logic(cast(A), cast(B)) -> cast(logic(A, B))
if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
- Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
+ Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
I.getName());
return CastInst::Create(CastOpcode, NewOp, DestTy);
}
@@ -1196,15 +1188,14 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
// Fold (and (sext bool to A), B) --> (select bool, B, 0)
Value *X = nullptr;
- if (match(Op0, m_SExt(m_Value(X))) &&
- X->getType()->getScalarType()->isIntegerTy(1)) {
+ if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
Value *Zero = Constant::getNullValue(Op1->getType());
return SelectInst::Create(X, Op1, Zero);
}
// Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
- X->getType()->getScalarType()->isIntegerTy(1)) {
+ X->getType()->isIntOrIntVectorTy(1)) {
Value *Zero = Constant::getNullValue(Op0->getType());
return SelectInst::Create(X, Zero, Op1);
}
@@ -1283,14 +1274,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return &I;
// Do this before using distributive laws to catch simple and/or/not patterns.
- if (Instruction *Xor = foldAndToXor(I, *Builder))
+ if (Instruction *Xor = foldAndToXor(I, Builder))
return Xor;
// (A|B)&(A|C) -> A|(B&C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyBSwap(I))
+ if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
@@ -1310,15 +1301,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
APInt NotAndRHS(~AndRHSMask);
if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) {
// Not masking anything out for the LHS, move to RHS.
- Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
- Op0RHS->getName()+".masked");
+ Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
}
if (!isa<Constant>(Op0RHS) &&
MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) {
// Not masking anything out for the RHS, move to LHS.
- Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
- Op0LHS->getName()+".masked");
+ Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
}
@@ -1337,7 +1328,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (1 >> x) & 1 --> zext(x == 0)
if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) {
Value *NewICmp =
- Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+ Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
return new ZExtInst(NewICmp, I.getType());
}
break;
@@ -1360,11 +1351,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
Value *BinOp;
if (isa<ZExtInst>(Op0LHS))
- BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1);
+ BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
else
- BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X);
+ BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
- auto *And = Builder->CreateAnd(BinOp, TruncC2);
+ auto *And = Builder.CreateAnd(BinOp, TruncC2);
return new ZExtInst(And, I.getType());
}
}
@@ -1384,7 +1375,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// into : and (trunc X to T), trunc(YC) & C2
// This will fold the two constants together, which may allow
// other simplifications.
- Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk");
Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
C3 = ConstantExpr::getAnd(C3, AndRHS);
return BinaryOperator::CreateAnd(NewCast, C3);
@@ -1396,7 +1387,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
return FoldedLogic;
- if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder))
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
return DeMorgan;
{
@@ -1422,7 +1413,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// an endless loop. By checking that A is non-constant we ensure that
// we will never get to the loop.
if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
+ return BinaryOperator::CreateAnd(A, Builder.CreateNot(B));
}
}
@@ -1436,13 +1427,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
- return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
+ return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
- return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
// (A | B) & (B ^ (~A)) -> (A & B)
@@ -1474,18 +1465,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
}
}
@@ -1567,14 +1558,14 @@ static Value *getSelectCondition(Value *A, Value *B,
InstCombiner::BuilderTy &Builder) {
// If these are scalars or vectors of i1, A can be used directly.
Type *Ty = A->getType();
- if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1))
+ if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1))
return A;
// If A and B are sign-extended, look through the sexts to find the booleans.
Value *Cond;
Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
- Cond->getType()->getScalarType()->isIntegerTy(1) &&
+ Cond->getType()->isIntOrIntVectorTy(1) &&
match(B, m_OneUse(m_Not(m_Value(NotB))))) {
NotB = peekThroughBitcast(NotB, true);
if (match(NotB, m_SExt(m_Specific(Cond))))
@@ -1596,7 +1587,7 @@ static Value *getSelectCondition(Value *A, Value *B,
// operand, see if the constants are inverse bitmasks.
if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
- Cond->getType()->getScalarType()->isIntegerTy(1) &&
+ Cond->getType()->isIntOrIntVectorTy(1) &&
areInverseVectorBitmasks(AC, BC)) {
AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
return Builder.CreateXor(Cond, AC);
@@ -1687,9 +1678,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
RangeDiff.ugt(LHSC->getValue())) {
Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
- Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC);
- Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC);
- return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC));
+ Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC);
+ Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
+ return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
}
}
}
@@ -1736,9 +1727,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
A = LHS->getOperand(1);
}
if (A && B)
- return Builder->CreateICmp(
+ return Builder.CreateICmp(
ICmpInst::ICMP_UGE,
- Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
+ Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
}
// E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
@@ -1759,8 +1750,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (LHSC == RHSC && PredL == PredR) {
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) {
- Value *NewOr = Builder->CreateOr(LHS0, RHS0);
- return Builder->CreateICmp(PredL, NewOr, LHSC);
+ Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewOr, LHSC);
}
}
@@ -1770,7 +1761,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
ConstantInt *AddC;
if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
- return Builder->CreateICmpULE(LHS0, LHSC);
+ return Builder.CreateICmpULE(LHS0, LHSC);
}
// From here on, we only handle:
@@ -1886,18 +1877,18 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// true.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return Builder->getTrue();
+ return Builder.getTrue();
// Otherwise, no need to compare the two constants, compare the
// rest.
- return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp uno x,x" is "fcmp uno x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
- return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
return nullptr;
}
@@ -1916,7 +1907,7 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
/// when the XOR of the two constants is "all ones" (-1).
static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1) return nullptr;
@@ -1928,7 +1919,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
if (!Xor.isAllOnesValue()) return nullptr;
if (V1 == A || V1 == B) {
- Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+ Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1);
return BinaryOperator::CreateOr(NewOp, V1);
}
@@ -1946,7 +1937,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
/// when the XOR of the two constants is "all ones" (-1).
static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1)
return nullptr;
@@ -1961,7 +1952,7 @@ static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
return nullptr;
if (V1 == A || V1 == B) {
- Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1);
+ Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1);
return BinaryOperator::CreateXor(NewOp, V1);
}
@@ -1987,14 +1978,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return &I;
// Do this before using distributive laws to catch simple and/or/not patterns.
- if (Instruction *Xor = foldOrToXor(I, *Builder))
+ if (Instruction *Xor = foldOrToXor(I, Builder))
return Xor;
// (A&B)|(A&C) -> A&(B|C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyBSwap(I))
+ if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
if (isa<Constant>(Op1))
@@ -2011,7 +2002,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
MaskedValueIsZero(Op1, *C, 0, &I)) {
- Value *NOr = Builder->CreateOr(A, Op1);
+ Value *NOr = Builder.CreateOr(A, Op1);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
ConstantInt::get(NOr->getType(), *C));
@@ -2020,7 +2011,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
MaskedValueIsZero(Op0, *C, 0, &I)) {
- Value *NOr = Builder->CreateOr(A, Op0);
+ Value *NOr = Builder.CreateOr(A, Op0);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
ConstantInt::get(NOr->getType(), *C));
@@ -2058,7 +2049,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
(V2 == B &&
MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
return BinaryOperator::CreateAnd(A,
- Builder->getInt(C1->getValue()|C2->getValue()));
+ Builder.getInt(C1->getValue()|C2->getValue()));
// Or commutes, try both ways.
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
((V1 == A &&
@@ -2066,7 +2057,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
(V2 == A &&
MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
return BinaryOperator::CreateAnd(B,
- Builder->getInt(C1->getValue()|C2->getValue()));
+ Builder.getInt(C1->getValue()|C2->getValue()));
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
@@ -2075,9 +2066,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
(C3->getValue() & ~C1->getValue()).isNullValue() &&
match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
(C4->getValue() & ~C2->getValue()).isNullValue()) {
- V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+ V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
return BinaryOperator::CreateAnd(V2,
- Builder->getInt(C1->getValue()|C2->getValue()));
+ Builder.getInt(C1->getValue()|C2->getValue()));
}
}
}
@@ -2087,21 +2078,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// 'or' that it is replacing.
if (Op0->hasOneUse() || Op1->hasOneUse()) {
// (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
- if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder))
+ if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder))
+ if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder))
+ if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder))
+ if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder))
+ if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder))
+ if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder))
+ if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder))
+ if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
return replaceInstUsesWith(I, V);
}
@@ -2139,9 +2130,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// ((B | C) & A) | B -> B | (A & C)
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
- return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
+ return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
- if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder))
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
return DeMorgan;
// Canonicalize xor to the RHS.
@@ -2163,11 +2154,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(A, B);
if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
- Value *Not = Builder->CreateNot(B, B->getName()+".not");
+ Value *Not = Builder.CreateNot(B, B->getName() + ".not");
return BinaryOperator::CreateOr(Not, Op0);
}
if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
- Value *Not = Builder->CreateNot(A, A->getName()+".not");
+ Value *Not = Builder.CreateNot(A, A->getName() + ".not");
return BinaryOperator::CreateOr(Not, Op0);
}
}
@@ -2181,7 +2172,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
B->getOpcode() == Instruction::Xor)) {
Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
B->getOperand(0);
- Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+ Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not");
return BinaryOperator::CreateOr(Not, Op0);
}
@@ -2194,7 +2185,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// xor was canonicalized to Op1 above.
if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
match(Op0, m_c_And(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+ return BinaryOperator::CreateXor(Builder.CreateNot(A), B);
if (SwappedForXor)
std::swap(Op0, Op1);
@@ -2212,18 +2203,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+ return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+ return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+ return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+ return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
}
}
@@ -2238,10 +2229,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
- A->getType()->getScalarType()->isIntegerTy(1))
+ A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
- A->getType()->getScalarType()->isIntegerTy(1))
+ A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
// Note: If we've gotten to the point of visiting the outer OR, then the
@@ -2252,7 +2243,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ConstantInt *C1;
if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
- Value *Inner = Builder->CreateOr(A, Op1);
+ Value *Inner = Builder.CreateOr(A, Op1);
Inner->takeName(Op0);
return BinaryOperator::CreateOr(Inner, C1);
}
@@ -2265,8 +2256,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
- Value *orTrue = Builder->CreateOr(A, C);
- Value *orFalse = Builder->CreateOr(B, D);
+ Value *orTrue = Builder.CreateOr(A, C);
+ Value *orFalse = Builder.CreateOr(B, D);
return SelectInst::Create(X, orTrue, orFalse);
}
}
@@ -2276,7 +2267,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
/// A ^ B can be specified using other logic ops in a variety of patterns. We
/// can fold these early and efficiently by morphing an existing instruction.
-static Instruction *foldXorToXor(BinaryOperator &I) {
+static Instruction *foldXorToXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
assert(I.getOpcode() == Instruction::Xor);
Value *Op0 = I.getOperand(0);
Value *Op1 = I.getOperand(1);
@@ -2323,6 +2315,21 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
return &I;
}
+ // For the remaining cases we need to get rid of one of the operands.
+ if (!Op0->hasOneUse() && !Op1->hasOneUse())
+ return nullptr;
+
+ // (A | B) ^ ~(A & B) -> ~(A ^ B)
+ // (A | B) ^ ~(B & A) -> ~(A ^ B)
+ // (A & B) ^ ~(A | B) -> ~(A ^ B)
+ // (A & B) ^ ~(B | A) -> ~(A ^ B)
+ // Complexity sorting ensures the not will be on the right side.
+ if ((match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) ||
+ (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
return nullptr;
}
@@ -2355,12 +2362,12 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
// (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
RHS->setPredicate(RHS->getInversePredicate());
- return Builder->CreateAnd(LHS, RHS);
+ return Builder.CreateAnd(LHS, RHS);
}
if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
// !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
LHS->setPredicate(LHS->getInversePredicate());
- return Builder->CreateAnd(LHS, RHS);
+ return Builder.CreateAnd(LHS, RHS);
}
}
}
@@ -2381,7 +2388,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *NewXor = foldXorToXor(I))
+ if (Instruction *NewXor = foldXorToXor(I, Builder))
return NewXor;
// (A&B)^(A&C) -> A&(B^C) etc
@@ -2393,7 +2400,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (Value *V = SimplifyBSwap(I))
+ if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
@@ -2404,13 +2411,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// ~(~X & Y) --> (X | ~Y)
// ~(Y & ~X) --> (X | ~Y)
if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
- Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateOr(X, NotY);
}
// ~(~X | Y) --> (X & ~Y)
// ~(Y | ~X) --> (X & ~Y)
if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
- Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateAnd(X, NotY);
}
@@ -2426,8 +2433,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
NotVal->getOperand(0)->hasOneUse()) &&
IsFreeToInvert(NotVal->getOperand(1),
NotVal->getOperand(1)->hasOneUse())) {
- Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs");
- Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs");
+ Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs");
+ Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs");
if (NotVal->getOpcode() == Instruction::And)
return BinaryOperator::CreateOr(NotX, NotY);
return BinaryOperator::CreateAnd(NotX, NotY);
@@ -2457,7 +2464,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
// not (cmp A, B) = !cmp A, B
- ICmpInst::Predicate Pred;
+ CmpInst::Predicate Pred;
if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
return replaceInstUsesWith(I, Op0);
@@ -2470,8 +2477,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (CI->hasOneUse() && Op0C->hasOneUse()) {
Instruction::CastOps Opcode = Op0C->getOpcode();
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(),
- Op0C->getDestTy()))) {
+ (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(),
+ Op0C->getDestTy()))) {
CI->setPredicate(CI->getInversePredicate());
return CastInst::Create(Opcode, CI, Op0C->getType());
}
@@ -2481,7 +2488,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ~(c-X) == X-c-1 == X+(-c-1)
- if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue())
+ if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne())
if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
return BinaryOperator::CreateAdd(Op0I->getOperand(1),
@@ -2491,13 +2498,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
if (Op0I->getOpcode() == Instruction::Add) {
// ~(X-c) --> (-c-1)-X
- if (RHSC->isAllOnesValue()) {
+ if (RHSC->isMinusOne()) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
return BinaryOperator::CreateSub(SubOne(NegOp0CI),
Op0I->getOperand(0));
} else if (RHSC->getValue().isSignMask()) {
// (X + C) ^ signmask -> (X + C + signmask)
- Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
+ Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
}
@@ -2530,7 +2537,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
APInt FoldConst = C1->getValue().lshr(C2->getValue());
FoldConst ^= C3->getValue();
// Prepare the two operands.
- Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+ Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2);
Opnd0->takeName(Op0I);
cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
@@ -2575,14 +2582,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (A == Op1) // (B|A)^B == (A|B)^B
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
+ return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1));
} else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
if (A == Op1) // (A&B)^A -> (B&A)^A
std::swap(A, B);
const APInt *C;
if (B == Op1 && // (B&A)^A == ~B & A
!match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C
- return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
+ return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1);
}
}
}
@@ -2594,20 +2601,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(Op1, m_Or(m_Value(A), m_Value(B)))) {
if (D == A)
return BinaryOperator::CreateXor(
- Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ Builder.CreateAnd(Builder.CreateNot(A), B), C);
if (D == B)
return BinaryOperator::CreateXor(
- Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ Builder.CreateAnd(Builder.CreateNot(B), A), C);
}
// (A | B)^(A ^ C) -> ((~A) & B) ^ C
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
match(Op1, m_Xor(m_Value(D), m_Value(C)))) {
if (D == A)
return BinaryOperator::CreateXor(
- Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ Builder.CreateAnd(Builder.CreateNot(A), B), C);
if (D == B)
return BinaryOperator::CreateXor(
- Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ Builder.CreateAnd(Builder.CreateNot(B), A), C);
}
// (A & B) ^ (A ^ B) -> (A | B)
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
@@ -2624,7 +2631,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Value *A, *B;
if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Not(m_Specific(A))))
- return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
+ return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3770021de1002..391c430dab75d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -128,23 +128,23 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
Src->getType()->getPointerAddressSpace());
- Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
- "memcpy_unfold.src_casted");
- Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
- "memcpy_unfold.dst_casted");
+ Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
+ "memcpy_unfold.src_casted");
+ Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
+ "memcpy_unfold.dst_casted");
for (uint64_t i = 0; i < NumElements; ++i) {
// Get current element addresses
ConstantInt *ElementIdxCI =
ConstantInt::get(AMI->getContext(), APInt(64, i));
Value *SrcElementAddr =
- Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
+ Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
Value *DstElementAddr =
- Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
+ Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
// Load from the source. Transfer alignment information and mark load as
// unordered atomic.
- LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
+ LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
Load->setOrdering(AtomicOrdering::Unordered);
// We know alignment of the first element. It is also guaranteed by the
// verifier that element size is less or equal than first element
@@ -157,7 +157,7 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
Load->setDebugLoc(AMI->getDebugLoc());
// Store loaded value via unordered atomic store.
- StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
+ StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
Store->setOrdering(AtomicOrdering::Unordered);
Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
: ElementSizeInBytes);
@@ -213,7 +213,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
if (M->getNumOperands() == 3 && M->getOperand(0) &&
mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
- mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
M->getOperand(1) &&
mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
@@ -227,9 +227,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
- Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
- Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
- LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+ Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
L->setAlignment(SrcAlign);
if (CopyMD)
L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
@@ -238,7 +238,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (LoopMemParallelMD)
L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
- StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+ StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
S->setAlignment(DstAlign);
if (CopyMD)
S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
@@ -274,15 +274,15 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
Value *Dest = MI->getDest();
unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
- Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+ Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
- StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
- MI->isVolatile());
+ StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
+ MI->isVolatile());
S->setAlignment(Alignment);
// Set the size of the copy to 0, it will be deleted on the next iteration.
@@ -600,8 +600,7 @@ static Value *simplifyX86muldq(const IntrinsicInst &II,
return Builder.CreateMul(LHS, RHS);
}
-static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
- InstCombiner::BuilderTy &Builder, bool IsSigned) {
+static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
Type *ResTy = II.getType();
@@ -676,8 +675,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
return ConstantVector::get(Vals);
}
-static Value *simplifyX86movmsk(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
+static Value *simplifyX86movmsk(const IntrinsicInst &II) {
Value *Arg = II.getArgOperand(0);
Type *ResTy = II.getType();
Type *ArgTy = Arg->getType();
@@ -860,7 +858,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
}
// Constant Fold - extraction from zero is always {zero, undef}.
- if (CI0 && CI0->equalsInt(0))
+ if (CI0 && CI0->isZero())
return LowConstantHighUndef(0);
return nullptr;
@@ -1404,7 +1402,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
&IC.getDominatorTree())) {
if (!match(II.getArgOperand(1), m_One())) {
- II.setOperand(1, IC.Builder->getTrue());
+ II.setOperand(1, IC.Builder.getTrue());
return &II;
}
}
@@ -1477,7 +1475,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
// the LLVM intrinsic definition for the pointer argument.
unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
- Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+ Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
// Second, convert the x86 XMM integer vector mask to a vector of bools based
// on each element's most significant bit (the sign bit).
@@ -1485,7 +1483,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
// The pass-through vector for an x86 masked load is a zero vector.
CallInst *NewMaskedLoad =
- IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
+ IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
return IC.replaceInstUsesWith(II, NewMaskedLoad);
}
@@ -1520,13 +1518,13 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
// the LLVM intrinsic definition for the pointer argument.
unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
- Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+ Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
// Second, convert the x86 XMM integer vector mask to a vector of bools based
// on each element's most significant bit (the sign bit).
Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
- IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
+ IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
// 'Replace uses' doesn't work for stores. Erase the original masked store.
IC.eraseInstFromFunction(II);
@@ -1764,7 +1762,7 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
// need special handling.
//
- // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just
+ // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
// as well.
case Intrinsic::nvvm_rcp_rn_d:
return {SPC_Reciprocal, FTZ_Any};
@@ -1965,16 +1963,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
+ // TODO should this be in InstSimplify?
// bswap(bswap(x)) -> x
if (match(IIOperand, m_BSwap(m_Value(X))))
- return replaceInstUsesWith(CI, X);
+ return replaceInstUsesWith(CI, X);
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getPrimitiveSizeInBits() -
IIOperand->getType()->getPrimitiveSizeInBits();
Value *CV = ConstantInt::get(X->getType(), C);
- Value *V = Builder->CreateLShr(X, CV);
+ Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
}
break;
@@ -1984,6 +1983,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
+ // TODO should this be in InstSimplify?
// bitreverse(bitreverse(x)) -> x
if (match(IIOperand, m_BitReverse(m_Value(X))))
return replaceInstUsesWith(CI, X);
@@ -1991,7 +1991,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::masked_load:
- if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
+ if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
break;
case Intrinsic::masked_store:
@@ -2010,7 +2010,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Power->isOne())
return replaceInstUsesWith(CI, II->getArgOperand(0));
// powi(x, -1) -> 1/x
- if (Power->isAllOnesValue())
+ if (Power->isMinusOne())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
II->getArgOperand(0));
}
@@ -2073,11 +2073,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::fmuladd: {
// Canonicalize fast fmuladd to the separate fmul + fadd.
if (II->hasUnsafeAlgebra()) {
- BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->setFastMathFlags(II->getFastMathFlags());
- Value *Mul = Builder->CreateFMul(II->getArgOperand(0),
- II->getArgOperand(1));
- Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2));
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(II->getFastMathFlags());
+ Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
+ II->getArgOperand(1));
+ Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
Add->takeName(II);
return replaceInstUsesWith(*II, Add);
}
@@ -2128,8 +2128,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Constant *LHS, *RHS;
if (match(II->getArgOperand(0),
m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
- CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
- CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
+ CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
+ CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
return SelectInst::Create(Cond, Call0, Call1);
}
@@ -2147,7 +2147,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// fabs (fpext x) -> fpext (fabs x)
Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
{ ExtSrc->getType() });
- CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
+ CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
NewFabs->copyFastMathFlags(II);
NewFabs->takeName(II);
return new FPExtInst(NewFabs, II->getType());
@@ -2174,7 +2174,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC lvx -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
&DT) >= 16) {
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
@@ -2182,8 +2182,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_vsx_lxvw4x:
case Intrinsic::ppc_vsx_lxvd2x: {
// Turn PPC VSX loads into normal loads.
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
- PointerType::getUnqual(II->getType()));
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr, Twine(""), false, 1);
}
case Intrinsic::ppc_altivec_stvx:
@@ -2193,7 +2193,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
@@ -2201,18 +2201,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_vsx_stxvd2x: {
// Turn PPC VSX stores into normal stores.
Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
}
case Intrinsic::ppc_qpx_qvlfs:
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
&DT) >= 16) {
- Type *VTy = VectorType::get(Builder->getFloatTy(),
+ Type *VTy = VectorType::get(Builder.getFloatTy(),
II->getType()->getVectorNumElements());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(VTy));
- Value *Load = Builder->CreateLoad(Ptr);
+ Value *Load = Builder.CreateLoad(Ptr);
return new FPExtInst(Load, II->getType());
}
break;
@@ -2220,7 +2220,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
&DT) >= 32) {
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
@@ -2229,11 +2229,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
&DT) >= 16) {
- Type *VTy = VectorType::get(Builder->getFloatTy(),
+ Type *VTy = VectorType::get(Builder.getFloatTy(),
II->getArgOperand(0)->getType()->getVectorNumElements());
- Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
Type *OpPtrTy = PointerType::getUnqual(VTy);
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(TOp, Ptr);
}
break;
@@ -2243,7 +2243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 32) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
@@ -2272,15 +2272,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
SmallVector<uint32_t, 8> SubVecMask;
for (unsigned i = 0; i != RetWidth; ++i)
SubVecMask.push_back((int)i);
- VectorHalfAsShorts = Builder->CreateShuffleVector(
+ VectorHalfAsShorts = Builder.CreateShuffleVector(
Arg, UndefValue::get(ArgType), SubVecMask);
}
auto VectorHalfType =
VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
auto VectorHalfs =
- Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
- auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+ Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+ auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
return replaceInstUsesWith(*II, VectorFloats);
}
@@ -2334,7 +2334,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_avx_movmsk_ps_256:
case Intrinsic::x86_avx2_pmovmskb: {
- if (Value *V = simplifyX86movmsk(*II, *Builder))
+ if (Value *V = simplifyX86movmsk(*II))
return replaceInstUsesWith(*II, V);
break;
}
@@ -2437,25 +2437,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_mask_add_ps_512:
case Intrinsic::x86_avx512_mask_add_pd_512:
- V = Builder->CreateFAdd(Arg0, Arg1);
+ V = Builder.CreateFAdd(Arg0, Arg1);
break;
case Intrinsic::x86_avx512_mask_sub_ps_512:
case Intrinsic::x86_avx512_mask_sub_pd_512:
- V = Builder->CreateFSub(Arg0, Arg1);
+ V = Builder.CreateFSub(Arg0, Arg1);
break;
case Intrinsic::x86_avx512_mask_mul_ps_512:
case Intrinsic::x86_avx512_mask_mul_pd_512:
- V = Builder->CreateFMul(Arg0, Arg1);
+ V = Builder.CreateFMul(Arg0, Arg1);
break;
case Intrinsic::x86_avx512_mask_div_ps_512:
case Intrinsic::x86_avx512_mask_div_pd_512:
- V = Builder->CreateFDiv(Arg0, Arg1);
+ V = Builder.CreateFDiv(Arg0, Arg1);
break;
}
// Create a select for the masking.
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
- *Builder);
+ Builder);
return replaceInstUsesWith(*II, V);
}
}
@@ -2476,27 +2476,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Extract the element as scalars.
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
- Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
- Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
+ Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
+ Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
Value *V;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_mask_add_ss_round:
case Intrinsic::x86_avx512_mask_add_sd_round:
- V = Builder->CreateFAdd(LHS, RHS);
+ V = Builder.CreateFAdd(LHS, RHS);
break;
case Intrinsic::x86_avx512_mask_sub_ss_round:
case Intrinsic::x86_avx512_mask_sub_sd_round:
- V = Builder->CreateFSub(LHS, RHS);
+ V = Builder.CreateFSub(LHS, RHS);
break;
case Intrinsic::x86_avx512_mask_mul_ss_round:
case Intrinsic::x86_avx512_mask_mul_sd_round:
- V = Builder->CreateFMul(LHS, RHS);
+ V = Builder.CreateFMul(LHS, RHS);
break;
case Intrinsic::x86_avx512_mask_div_ss_round:
case Intrinsic::x86_avx512_mask_div_sd_round:
- V = Builder->CreateFDiv(LHS, RHS);
+ V = Builder.CreateFDiv(LHS, RHS);
break;
}
@@ -2506,18 +2506,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// We don't need a select if we know the mask bit is a 1.
if (!C || !C->getValue()[0]) {
// Cast the mask to an i1 vector and then extract the lowest element.
- auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
+ auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder->CreateBitCast(Mask, MaskTy);
- Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
+ Mask = Builder.CreateBitCast(Mask, MaskTy);
+ Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
// Extract the lowest element from the passthru operand.
- Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
+ Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
(uint64_t)0);
- V = Builder->CreateSelect(Mask, V, Passthru);
+ V = Builder.CreateSelect(Mask, V, Passthru);
}
// Insert the result back into the original argument 0.
- V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
+ V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
return replaceInstUsesWith(*II, V);
}
@@ -2598,7 +2598,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_pslli_d_512:
case Intrinsic::x86_avx512_pslli_q_512:
case Intrinsic::x86_avx512_pslli_w_512:
- if (Value *V = simplifyX86immShift(*II, *Builder))
+ if (Value *V = simplifyX86immShift(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2629,7 +2629,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_psll_d_512:
case Intrinsic::x86_avx512_psll_q_512:
case Intrinsic::x86_avx512_psll_w_512: {
- if (Value *V = simplifyX86immShift(*II, *Builder))
+ if (Value *V = simplifyX86immShift(*II, Builder))
return replaceInstUsesWith(*II, V);
// SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
@@ -2673,7 +2673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_psrlv_w_128:
case Intrinsic::x86_avx512_psrlv_w_256:
case Intrinsic::x86_avx512_psrlv_w_512:
- if (Value *V = simplifyX86varShift(*II, *Builder))
+ if (Value *V = simplifyX86varShift(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2683,7 +2683,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_pmulu_dq:
case Intrinsic::x86_avx512_pmul_dq_512:
case Intrinsic::x86_avx512_pmulu_dq_512: {
- if (Value *V = simplifyX86muldq(*II, *Builder))
+ if (Value *V = simplifyX86muldq(*II, Builder))
return replaceInstUsesWith(*II, V);
unsigned VWidth = II->getType()->getVectorNumElements();
@@ -2703,7 +2703,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packsswb:
case Intrinsic::x86_avx512_packssdw_512:
case Intrinsic::x86_avx512_packsswb_512:
- if (Value *V = simplifyX86pack(*II, *this, *Builder, true))
+ if (Value *V = simplifyX86pack(*II, true))
return replaceInstUsesWith(*II, V);
break;
@@ -2713,7 +2713,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packuswb:
case Intrinsic::x86_avx512_packusdw_512:
case Intrinsic::x86_avx512_packuswb_512:
- if (Value *V = simplifyX86pack(*II, *this, *Builder, false))
+ if (Value *V = simplifyX86pack(*II, false))
return replaceInstUsesWith(*II, V);
break;
@@ -2756,7 +2756,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::x86_sse41_insertps:
- if (Value *V = simplifyX86insertps(*II, *Builder))
+ if (Value *V = simplifyX86insertps(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2779,7 +2779,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
: nullptr;
// Attempt to simplify to a constant, shuffle vector or EXTRQI call.
- if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
return replaceInstUsesWith(*II, V);
// EXTRQ only uses the lowest 64-bits of the first 128-bit vector
@@ -2811,7 +2811,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
// Attempt to simplify to a constant or shuffle vector.
- if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
return replaceInstUsesWith(*II, V);
// EXTRQI only uses the lowest 64-bits of the first 128-bit vector
@@ -2843,7 +2843,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
const APInt &V11 = CI11->getValue();
APInt Len = V11.zextOrTrunc(6);
APInt Idx = V11.lshr(8).zextOrTrunc(6);
- if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
return replaceInstUsesWith(*II, V);
}
@@ -2876,7 +2876,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (CILength && CIIndex) {
APInt Len = CILength->getValue().zextOrTrunc(6);
APInt Idx = CIIndex->getValue().zextOrTrunc(6);
- if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
return replaceInstUsesWith(*II, V);
}
@@ -2930,7 +2930,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
case Intrinsic::x86_avx512_pshuf_b_512:
- if (Value *V = simplifyX86pshufb(*II, *Builder))
+ if (Value *V = simplifyX86pshufb(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2940,13 +2940,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
case Intrinsic::x86_avx512_vpermilvar_pd_512:
- if (Value *V = simplifyX86vpermilvar(*II, *Builder))
+ if (Value *V = simplifyX86vpermilvar(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
- if (Value *V = simplifyX86vpermv(*II, *Builder))
+ if (Value *V = simplifyX86vpermv(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2964,10 +2964,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_mask_permvar_sf_512:
case Intrinsic::x86_avx512_mask_permvar_si_256:
case Intrinsic::x86_avx512_mask_permvar_si_512:
- if (Value *V = simplifyX86vpermv(*II, *Builder)) {
+ if (Value *V = simplifyX86vpermv(*II, Builder)) {
// We simplified the permuting, now create a select for the masking.
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
- *Builder);
+ Builder);
return replaceInstUsesWith(*II, V);
}
break;
@@ -2976,7 +2976,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
- if (Value *V = simplifyX86vperm2(*II, *Builder))
+ if (Value *V = simplifyX86vperm2(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -3009,7 +3009,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_xop_vpcomd:
case Intrinsic::x86_xop_vpcomq:
case Intrinsic::x86_xop_vpcomw:
- if (Value *V = simplifyX86vpcom(*II, *Builder, true))
+ if (Value *V = simplifyX86vpcom(*II, Builder, true))
return replaceInstUsesWith(*II, V);
break;
@@ -3017,7 +3017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_xop_vpcomud:
case Intrinsic::x86_xop_vpcomuq:
case Intrinsic::x86_xop_vpcomuw:
- if (Value *V = simplifyX86vpcom(*II, *Builder, false))
+ if (Value *V = simplifyX86vpcom(*II, Builder, false))
return replaceInstUsesWith(*II, V);
break;
@@ -3044,10 +3044,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
- Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
- Mask->getType());
- Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
- Mask->getType());
+ Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
@@ -3067,13 +3067,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
- Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
- Builder->getInt32(Idx&15));
+ Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
+ Builder.getInt32(Idx&15));
}
// Insert this value into the result vector.
- Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
- Builder->getInt32(i));
+ Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
+ Builder.getInt32(i));
}
return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
@@ -3238,7 +3238,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Mask == (S_NAN | Q_NAN)) {
// Equivalent of isnan. Replace with standard fcmp.
- Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
+ Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
FCmp->takeName(II);
return replaceInstUsesWith(*II, FCmp);
}
@@ -3250,7 +3250,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Clamp mask to used bits
if ((Mask & FullMask) != Mask) {
- CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+ CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
{ Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
);
@@ -3343,13 +3343,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: Also emit sub if only width is constant.
if (!CWidth && COffset && Offset == 0) {
Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
- Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2));
- ShiftVal = Builder->CreateZExt(ShiftVal, II->getType());
+ Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
+ ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
- Value *Shl = Builder->CreateShl(Src, ShiftVal);
- Value *RightShift = Signed ?
- Builder->CreateAShr(Shl, ShiftVal) :
- Builder->CreateLShr(Shl, ShiftVal);
+ Value *Shl = Builder.CreateShl(Src, ShiftVal);
+ Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
+ : Builder.CreateLShr(Shl, ShiftVal);
RightShift->takeName(II);
return replaceInstUsesWith(*II, RightShift);
}
@@ -3360,17 +3359,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: This allows folding to undef when the hardware has specific
// behavior?
if (Offset + Width < IntSize) {
- Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width);
- Value *RightShift = Signed ?
- Builder->CreateAShr(Shl, IntSize - Width) :
- Builder->CreateLShr(Shl, IntSize - Width);
+ Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
+ Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
+ : Builder.CreateLShr(Shl, IntSize - Width);
RightShift->takeName(II);
return replaceInstUsesWith(*II, RightShift);
}
- Value *RightShift = Signed ?
- Builder->CreateAShr(Src, Offset) :
- Builder->CreateLShr(Src, Offset);
+ Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
+ : Builder.CreateLShr(Src, Offset);
RightShift->takeName(II);
return replaceInstUsesWith(*II, RightShift);
@@ -3439,7 +3436,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
- CallInst *NewCall = Builder->CreateMinNum(Src0, Src1);
+ CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
NewCall->copyFastMathFlags(II);
NewCall->takeName(II);
return replaceInstUsesWith(*II, NewCall);
@@ -3451,7 +3448,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
C2->getValueAPF());
return replaceInstUsesWith(*II,
- ConstantFP::get(Builder->getContext(), Result));
+ ConstantFP::get(Builder.getContext(), Result));
}
}
}
@@ -3494,7 +3491,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
MDNode *MD = MDNode::get(II->getContext(), MDArgs);
Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
- CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ CallInst *NewCall = Builder.CreateCall(NewF, Args);
NewCall->addAttribute(AttributeList::FunctionIndex,
Attribute::Convergent);
NewCall->takeName(II);
@@ -3556,7 +3553,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
SrcLHS->getType());
Value *Args[] = { SrcLHS, SrcRHS,
ConstantInt::get(CC->getType(), SrcPred) };
- CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ CallInst *NewCall = Builder.CreateCall(NewF, Args);
NewCall->takeName(II);
return replaceInstUsesWith(*II, NewCall);
}
@@ -3633,16 +3630,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// the InstCombineIRInserter object.
Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
- Builder->CreateCall(AssumeIntrinsic, A, II->getName());
- Builder->CreateCall(AssumeIntrinsic, B, II->getName());
+ Builder.CreateCall(AssumeIntrinsic, A, II->getName());
+ Builder.CreateCall(AssumeIntrinsic, B, II->getName());
return eraseInstFromFunction(*II);
}
// assume(!(a || b)) -> assume(!a); assume(!b);
if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
- Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
- II->getName());
- Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
- II->getName());
+ Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
+ Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
return eraseInstFromFunction(*II);
}
@@ -3726,7 +3721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return eraseInstFromFunction(*NextInst);
// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
- II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond));
+ II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
return eraseInstFromFunction(*NextInst);
}
break;
@@ -4163,7 +4158,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Value *NewArg = *AI;
if ((*AI)->getType() != ParamTy)
- NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy);
+ NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
Args.push_back(NewArg);
// Add any parameter attributes.
@@ -4189,7 +4184,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
- NewArg = Builder->CreateCast(opcode, *AI, PTy);
+ NewArg = Builder.CreateCast(opcode, *AI, PTy);
}
Args.push_back(NewArg);
@@ -4215,10 +4210,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(),
- II->getUnwindDest(), Args, OpBundles);
+ NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args, OpBundles);
} else {
- NewCS = Builder->CreateCall(Callee, Args, OpBundles);
+ NewCS = Builder.CreateCall(Callee, Args, OpBundles);
cast<CallInst>(NewCS.getInstruction())
->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
}
@@ -4328,7 +4323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the chain argument and attributes.
Value *NestVal = Tramp->getArgOperand(2);
if (NestVal->getType() != NestTy)
- NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+ NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
NewArgAttrs.push_back(NestAttr);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d3049389dfb9f..dfdfd3e9da840 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -84,7 +84,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
PointerType *PTy = cast<PointerType>(CI.getType());
- BuilderTy AllocaBuilder(*Builder);
+ BuilderTy AllocaBuilder(Builder);
AllocaBuilder.SetInsertPoint(&AI);
// Get the type really allocated and the type casted to.
@@ -406,8 +406,7 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
/// --->
/// extractelement <4 x i32> %X, 1
-static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
- const DataLayout &DL) {
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
Value *TruncOp = Trunc.getOperand(0);
Type *DestType = Trunc.getType();
if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
@@ -434,14 +433,14 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
unsigned NumVecElts = VecWidth / DestWidth;
if (VecType->getElementType() != DestType) {
VecType = VectorType::get(DestType, NumVecElts);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+ VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
}
unsigned Elt = ShiftAmount / DestWidth;
- if (DL.isBigEndian())
+ if (IC.getDataLayout().isBigEndian())
Elt = NumVecElts - 1 - Elt;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
}
/// Try to narrow the width of bitwise logic instructions with constants.
@@ -460,7 +459,7 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) {
// trunc (logic X, C) --> logic (trunc X, C')
Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
- Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy);
+ Value *NarrowOp0 = Builder.CreateTrunc(LogicOp->getOperand(0), DestTy);
return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC);
}
@@ -554,7 +553,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
Constant *One = ConstantInt::get(SrcTy, 1);
- Src = Builder->CreateAnd(Src, One);
+ Src = Builder.CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
@@ -580,7 +579,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Since we're doing an lshr and a zero extend, and know that the shift
// amount is smaller than ASize, it is always safe to do the shift in A's
// type, then zero extend or truncate to the result.
- Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue());
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, DestTy, false);
}
@@ -610,7 +609,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(),
std::min(ShiftAmt, ASize - 1)));
if (SExt->hasOneUse()) {
- Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
+ Value *Shift = Builder.CreateAShr(A, std::min(ShiftAmt, ASize - 1));
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
}
@@ -620,10 +619,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *I = shrinkBitwiseLogic(CI))
return I;
- if (Instruction *I = shrinkSplatShuffle(CI, *Builder))
+ if (Instruction *I = shrinkSplatShuffle(CI, Builder))
return I;
- if (Instruction *I = shrinkInsertElt(CI, *Builder))
+ if (Instruction *I = shrinkInsertElt(CI, Builder))
return I;
if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
@@ -636,7 +635,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// FoldShiftByConstant and is the extend in reg pattern.
const unsigned DestSize = DestTy->getScalarSizeInBits();
if (Cst->getValue().ult(DestSize)) {
- Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
+ Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
return BinaryOperator::Create(
Instruction::Shl, NewTrunc,
@@ -645,7 +644,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
}
}
- if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+ if (Instruction *I = foldVecTruncToExtElt(CI, *this))
return I;
return nullptr;
@@ -668,13 +667,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
- In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit");
+ In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
+ In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One, In->getName() + ".not");
+ In = Builder.CreateXor(In, One, In->getName() + ".not");
}
return replaceInstUsesWith(CI, In);
@@ -713,19 +712,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
if (ShAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
- In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
- In->getName() + ".lobit");
+ In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
+ In->getName() + ".lobit");
}
if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One);
+ In = Builder.CreateXor(In, One);
}
if (CI.getType() == In->getType())
return replaceInstUsesWith(CI, In);
- Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false);
+ Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false);
return replaceInstUsesWith(CI, IntCast);
}
}
@@ -748,19 +747,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
if (UnknownBit.countPopulation() == 1) {
if (!DoTransform) return ICI;
- Value *Result = Builder->CreateXor(LHS, RHS);
+ Value *Result = Builder.CreateXor(LHS, RHS);
// Mask off any bits that are set and won't be shifted away.
if (KnownLHS.One.uge(UnknownBit))
- Result = Builder->CreateAnd(Result,
+ Result = Builder.CreateAnd(Result,
ConstantInt::get(ITy, UnknownBit));
// Shift the bit we're testing down to the lsb.
- Result = Builder->CreateLShr(
+ Result = Builder.CreateLShr(
Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
Result->takeName(ICI);
return replaceInstUsesWith(CI, Result);
}
@@ -960,7 +959,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (SrcSize < DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
- Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
return new ZExtInst(And, CI.getType());
}
@@ -970,7 +969,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
AndValue));
}
if (SrcSize > DstSize) {
- Value *Trunc = Builder->CreateTrunc(A, CI.getType());
+ Value *Trunc = Builder.CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
@@ -992,8 +991,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
(transformZExtICmp(LHS, CI, false) ||
transformZExtICmp(RHS, CI, false))) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
- Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
- Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+ Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
+ Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast);
// Perform the elimination.
@@ -1020,7 +1019,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
X->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
- return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
+ return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
}
return nullptr;
@@ -1043,12 +1042,12 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Sh = ConstantInt::get(Op0->getType(),
Op0->getType()->getScalarSizeInBits()-1);
- Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+ Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
+ In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
if (Pred == ICmpInst::ICMP_SGT)
- In = Builder->CreateNot(In, In->getName()+".not");
+ In = Builder.CreateNot(In, In->getName() + ".not");
return replaceInstUsesWith(CI, In);
}
}
@@ -1079,26 +1078,26 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
// Perform a right shift to place the desired bit in the LSB.
if (ShiftAmt)
- In = Builder->CreateLShr(In,
- ConstantInt::get(In->getType(), ShiftAmt));
+ In = Builder.CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
// At this point "In" is either 1 or 0. Subtract 1 to turn
// {1, 0} -> {0, -1}.
- In = Builder->CreateAdd(In,
- ConstantInt::getAllOnesValue(In->getType()),
- "sext");
+ In = Builder.CreateAdd(In,
+ ConstantInt::getAllOnesValue(In->getType()),
+ "sext");
} else {
// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
// Perform a left shift to place the desired bit in the MSB.
if (ShiftAmt)
- In = Builder->CreateShl(In,
- ConstantInt::get(In->getType(), ShiftAmt));
+ In = Builder.CreateShl(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
// Distribute the bit over the whole bit width.
- In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
- KnownZeroMask.getBitWidth() - 1), "sext");
+ In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
+ KnownZeroMask.getBitWidth() - 1), "sext");
}
if (CI.getType() == In->getType())
@@ -1191,7 +1190,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// instead.
KnownBits Known = computeKnownBits(Src, 0, &CI);
if (Known.isNonNegative()) {
- Value *ZExt = Builder->CreateZExt(Src, DestTy);
+ Value *ZExt = Builder.CreateZExt(Src, DestTy);
return replaceInstUsesWith(CI, ZExt);
}
@@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
- return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+ return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
ShAmt);
}
@@ -1229,7 +1228,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
- return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt);
+ return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
}
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
@@ -1258,7 +1257,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
- A = Builder->CreateShl(A, ShAmtV, CI.getName());
+ A = Builder.CreateShl(A, ShAmtV, CI.getName());
return BinaryOperator::CreateAShr(A, ShAmtV);
}
@@ -1347,9 +1346,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// case of interest here is (float)((double)float + float)).
if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
if (LHSOrig->getType() != CI.getType())
- LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
if (RHSOrig->getType() != CI.getType())
- RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
Instruction *RI =
BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig);
RI->copyFastMathFlags(OpI);
@@ -1364,9 +1363,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// in the destination format if it can represent both sources.
if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
if (LHSOrig->getType() != CI.getType())
- LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
if (RHSOrig->getType() != CI.getType())
- RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
Instruction *RI =
BinaryOperator::CreateFMul(LHSOrig, RHSOrig);
RI->copyFastMathFlags(OpI);
@@ -1382,9 +1381,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// TODO: Tighten bound via rigorous analysis of the unbalanced case.
if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
if (LHSOrig->getType() != CI.getType())
- LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
if (RHSOrig->getType() != CI.getType())
- RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
Instruction *RI =
BinaryOperator::CreateFDiv(LHSOrig, RHSOrig);
RI->copyFastMathFlags(OpI);
@@ -1399,11 +1398,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (SrcWidth == OpWidth)
break;
if (LHSWidth < SrcWidth)
- LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
+ LHSOrig = Builder.CreateFPExt(LHSOrig, RHSOrig->getType());
else if (RHSWidth <= SrcWidth)
- RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
+ RHSOrig = Builder.CreateFPExt(RHSOrig, LHSOrig->getType());
if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) {
- Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+ Value *ExactResult = Builder.CreateFRem(LHSOrig, RHSOrig);
if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
RI->copyFastMathFlags(OpI);
return CastInst::CreateFPCast(ExactResult, CI.getType());
@@ -1412,8 +1411,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// (fptrunc (fneg x)) -> (fneg (fptrunc x))
if (BinaryOperator::isFNeg(OpI)) {
- Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
- CI.getType());
+ Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1),
+ CI.getType());
Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc);
RI->copyFastMathFlags(OpI);
return RI;
@@ -1432,10 +1431,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
(isa<ConstantFP>(SI->getOperand(1)) ||
isa<ConstantFP>(SI->getOperand(2))) &&
matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
- Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
- CI.getType());
- Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
- CI.getType());
+ Value *LHSTrunc = Builder.CreateFPTrunc(SI->getOperand(1), CI.getType());
+ Value *RHSTrunc = Builder.CreateFPTrunc(SI->getOperand(2), CI.getType());
return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
}
@@ -1465,7 +1462,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// Do unary FP operation on smaller type.
// (fptrunc (fabs x)) -> (fabs (fptrunc x))
- Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType());
+ Value *InnerTrunc = Builder.CreateFPTrunc(Src, CI.getType());
Type *IntrinsicType[] = { CI.getType() };
Function *Overload = Intrinsic::getDeclaration(
CI.getModule(), II->getIntrinsicID(), IntrinsicType);
@@ -1482,7 +1479,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
- if (Instruction *I = shrinkInsertElt(CI, *Builder))
+ if (Instruction *I = shrinkInsertElt(CI, Builder))
return I;
return nullptr;
@@ -1577,7 +1574,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
return new IntToPtrInst(P, CI.getType());
}
@@ -1627,7 +1624,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy);
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
@@ -1653,7 +1650,7 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
return nullptr;
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
- InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
}
// Now that the element types match, get the shuffle mask and RHS of the
@@ -1833,8 +1830,8 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
if (!Elements[i]) continue; // Unset element.
- Result = IC.Builder->CreateInsertElement(Result, Elements[i],
- IC.Builder->getInt32(i));
+ Result = IC.Builder.CreateInsertElement(Result, Elements[i],
+ IC.Builder.getInt32(i));
}
return Result;
@@ -1845,8 +1842,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
/// vectors better than bitcasts of scalars because vector registers are
/// usually not type-specific like scalar integer or scalar floating-point.
static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
- InstCombiner &IC,
- const DataLayout &DL) {
+ InstCombiner &IC) {
// TODO: Create and use a pattern matcher for ExtractElementInst.
auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
if (!ExtElt || !ExtElt->hasOneUse())
@@ -1860,8 +1856,8 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
auto *NewVecType = VectorType::get(DestType, NumElts);
- auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
- NewVecType, "bc");
+ auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
+ NewVecType, "bc");
return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
}
@@ -1870,7 +1866,7 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
InstCombiner::BuilderTy &Builder) {
Type *DestTy = BitCast.getType();
BinaryOperator *BO;
- if (!DestTy->getScalarType()->isIntegerTy() ||
+ if (!DestTy->isIntOrIntVectorTy() ||
!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
!BO->isBitwiseLogicOp())
return nullptr;
@@ -2033,8 +2029,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
// For each old PHI node, create a corresponding new PHI node with a type A.
SmallDenseMap<PHINode *, PHINode *> NewPNodes;
for (auto *OldPN : OldPhiNodes) {
- Builder->SetInsertPoint(OldPN);
- PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands());
+ Builder.SetInsertPoint(OldPN);
+ PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
NewPNodes[OldPN] = NewPN;
}
@@ -2047,8 +2043,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
if (auto *C = dyn_cast<Constant>(V)) {
NewV = ConstantExpr::getBitCast(C, DestTy);
} else if (auto *LI = dyn_cast<LoadInst>(V)) {
- Builder->SetInsertPoint(LI->getNextNode());
- NewV = Builder->CreateBitCast(LI, DestTy);
+ Builder.SetInsertPoint(LI->getNextNode());
+ NewV = Builder.CreateBitCast(LI, DestTy);
Worklist.Add(LI);
} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
NewV = BCI->getOperand(0);
@@ -2064,9 +2060,9 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
for (User *U : PN->users()) {
auto *SI = dyn_cast<StoreInst>(U);
if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
- Builder->SetInsertPoint(SI);
+ Builder.SetInsertPoint(SI);
auto *NewBC =
- cast<BitCastInst>(Builder->CreateBitCast(NewPNodes[PN], SrcTy));
+ cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy));
SI->setOperand(0, NewBC);
Worklist.Add(SI);
assert(hasStoreUsersOnly(*NewBC));
@@ -2121,14 +2117,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
- SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
+ SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
- Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+ Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
@@ -2161,7 +2157,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// scalar-scalar cast.
if (!DestTy->isVectorTy()) {
Value *Elem =
- Builder->CreateExtractElement(Src,
+ Builder.CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
return CastInst::Create(Instruction::BitCast, Elem, DestTy);
}
@@ -2190,8 +2186,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Tmp->getOperand(0)->getType() == DestTy) ||
((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
Tmp->getOperand(0)->getType() == DestTy)) {
- Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
- Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+ Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy);
+ Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy);
// Return a new shuffle vector. Use the same element ID's, as we
// know the vector types match #elts.
return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
@@ -2204,13 +2200,13 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
return I;
- if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+ if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
return I;
- if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder))
+ if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
return I;
- if (Instruction *I = foldBitCastSelect(CI, *Builder))
+ if (Instruction *I = foldBitCastSelect(CI, Builder))
return I;
if (SrcTy->isPointerTy())
@@ -2234,7 +2230,7 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
MidTy = VectorType::get(MidTy, VT->getNumElements());
}
- Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
+ Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
return new AddrSpaceCastInst(NewBitCast, CI.getType());
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 58b8b2f526299..60d1cde971dd4 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -392,7 +392,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
- Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ Idx = Builder.CreateTrunc(Idx, IntPtrTy);
}
// If the comparison is only true for one or two elements, emit direct
@@ -400,7 +400,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
if (SecondTrueElement != Overdefined) {
// None true -> false.
if (FirstTrueElement == Undefined)
- return replaceInstUsesWith(ICI, Builder->getFalse());
+ return replaceInstUsesWith(ICI, Builder.getFalse());
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
@@ -409,9 +409,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
// True for two elements -> 'i == 47 | i == 72'.
- Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+ Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
- Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+ Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
return BinaryOperator::CreateOr(C1, C2);
}
@@ -420,7 +420,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
if (SecondFalseElement != Overdefined) {
// None false -> true.
if (FirstFalseElement == Undefined)
- return replaceInstUsesWith(ICI, Builder->getTrue());
+ return replaceInstUsesWith(ICI, Builder.getTrue());
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
@@ -429,9 +429,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
// False for two elements -> 'i != 47 & i != 72'.
- Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+ Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
- Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+ Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
@@ -443,7 +443,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
- Idx = Builder->CreateAdd(Idx, Offs);
+ Idx = Builder.CreateAdd(Idx, Offs);
}
Value *End = ConstantInt::get(Idx->getType(),
@@ -457,7 +457,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
if (FirstFalseElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
- Idx = Builder->CreateAdd(Idx, Offs);
+ Idx = Builder.CreateAdd(Idx, Offs);
}
Value *End = ConstantInt::get(Idx->getType(),
@@ -481,9 +481,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
- Value *V = Builder->CreateIntCast(Idx, Ty, false);
- V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
- V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ Value *V = Builder.CreateIntCast(Idx, Ty, false);
+ V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
}
}
@@ -566,7 +566,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+ VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
}
@@ -588,10 +588,10 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
// Okay, we can do this evaluation. Start by converting the index to intptr.
if (VariableIdx->getType() != IntPtrTy)
- VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+ VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
- return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+ return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
}
/// Returns true if we can rewrite Start as a GEP with pointer Base
@@ -981,13 +981,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (LHSIndexTy != RHSIndexTy) {
if (LHSIndexTy->getPrimitiveSizeInBits() <
RHSIndexTy->getPrimitiveSizeInBits()) {
- ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy);
+ ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
} else
- LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy);
+ LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
}
- Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
- LOffset, ROffset);
+ Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
+ LOffset, ROffset);
return replaceInstUsesWith(I, Cmp);
}
@@ -1026,7 +1026,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (NumDifferences == 0) // SAME GEP?
return replaceInstUsesWith(I, // No comparison is needed here.
- Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+ Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -1174,7 +1174,7 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI,
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
- Constant *C = Builder->getInt(CI->getValue()-1);
+ Constant *C = Builder.getInt(CI->getValue() - 1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
}
@@ -1347,17 +1347,17 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
Value *F = Intrinsic::getDeclaration(I.getModule(),
Intrinsic::sadd_with_overflow, NewType);
- InstCombiner::BuilderTy *Builder = IC.Builder;
+ InstCombiner::BuilderTy &Builder = IC.Builder;
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
- Builder->SetInsertPoint(OrigAdd);
+ Builder.SetInsertPoint(OrigAdd);
- Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc");
- Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc");
- CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
- Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
- Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+ Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
+ Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
+ CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
+ Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
// The inner add was the result of the narrow add, zero extended to the
// wider type. Replace it with the result computed by the intrinsic.
@@ -1434,9 +1434,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
ConstantRange Intersection = DominatingCR.intersectWith(CR);
ConstantRange Difference = DominatingCR.difference(CR);
if (Intersection.isEmptySet())
- return replaceInstUsesWith(Cmp, Builder->getFalse());
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
if (Difference.isEmptySet())
- return replaceInstUsesWith(Cmp, Builder->getTrue());
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
// If this is a normal comparison, it demands all bits. If it is a sign
// bit comparison, it only demands the sign bit.
@@ -1452,9 +1452,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
return nullptr;
if (auto *AI = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
if (auto *AD = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
}
return nullptr;
@@ -1628,11 +1628,11 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
!Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
// Compute C2 << Y.
Value *NewShift =
- IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1))
- : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1));
+ IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
+ : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
// Compute X & (C2 << Y).
- Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift);
+ Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
Cmp.setOperand(0, NewAnd);
return &Cmp;
}
@@ -1670,7 +1670,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
unsigned WideScalarBits = WideType->getScalarSizeInBits();
Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits));
Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
- Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName());
+ Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
}
}
@@ -1704,12 +1704,12 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
} else {
if (UsesRemoved >= 3)
- NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(),
- /*HasNUW=*/true),
- One, Or->getName());
+ NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
+ /*HasNUW=*/true),
+ One, Or->getName());
}
if (NewOr) {
- Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName());
+ Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
Cmp.setOperand(0, NewAnd);
return &Cmp;
}
@@ -1772,7 +1772,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
if (And->getType()->isVectorTy())
NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
- Value *Trunc = Builder->CreateTrunc(X, NTy);
+ Value *Trunc = Builder.CreateTrunc(X, NTy);
auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
: CmpInst::ICMP_SLT;
return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
@@ -1811,9 +1811,9 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
// Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
// -> and (icmp eq P, null), (icmp eq Q, null).
Value *CmpP =
- Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
+ Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
Value *CmpQ =
- Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
+ Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And
: Instruction::Or;
return BinaryOperator::Create(LogicOpc, CmpP, CmpQ);
@@ -1993,7 +1993,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
Constant *Mask = ConstantInt::get(
ShType,
APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
- Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+ Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt));
return new ICmpInst(Pred, And, LShrC);
}
@@ -2005,7 +2005,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
Constant *Mask = ConstantInt::get(
ShType,
APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
- Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+ Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
And, Constant::getNullValue(ShType));
}
@@ -2024,7 +2024,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
Constant *NewC =
ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt));
- return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC);
+ return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
}
return nullptr;
@@ -2076,8 +2076,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
Constant *DivCst = ConstantInt::get(
Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
- Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact())
- : Builder->CreateUDiv(X, DivCst, "", Shr->isExact());
+ Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact())
+ : Builder.CreateUDiv(X, DivCst, "", Shr->isExact());
Cmp.setOperand(0, Tmp);
@@ -2115,7 +2115,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
// Otherwise strength reduce the shift into an 'and'.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
Constant *Mask = ConstantInt::get(Shr->getType(), Val);
- Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask");
+ Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
return new ICmpInst(Pred, And, ShiftedCmpRHS);
}
@@ -2279,7 +2279,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
default: llvm_unreachable("Unhandled icmp opcode!");
case ICmpInst::ICMP_EQ:
if (LoOverflow && HiOverflow)
- return replaceInstUsesWith(Cmp, Builder->getFalse());
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, LoBound);
@@ -2291,7 +2291,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
HiBound->getUniqueInteger(), DivIsSigned, true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
- return replaceInstUsesWith(Cmp, Builder->getTrue());
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, LoBound);
@@ -2305,16 +2305,16 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT:
if (LoOverflow == +1) // Low bound is greater than input range.
- return replaceInstUsesWith(Cmp, Builder->getTrue());
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
if (LoOverflow == -1) // Low bound is less than input range.
- return replaceInstUsesWith(Cmp, Builder->getFalse());
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
return new ICmpInst(Pred, X, LoBound);
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT:
if (HiOverflow == +1) // High bound greater than input range.
- return replaceInstUsesWith(Cmp, Builder->getFalse());
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
if (HiOverflow == -1) // High bound less than input range.
- return replaceInstUsesWith(Cmp, Builder->getTrue());
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
if (Pred == ICmpInst::ICMP_UGT)
return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
@@ -2361,12 +2361,12 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
// iff (C2 & (C - 1)) == C - 1 and C is a power of 2
if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() &&
(*C2 & (*C - 1)) == (*C - 1))
- return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X);
+ return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X);
// C2 - Y >u C -> (Y | C) != C2
// iff C2 & C == C and C + 1 is a power of 2
if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C)
- return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X);
+ return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X);
return nullptr;
}
@@ -2422,14 +2422,14 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
// iff C & (C2-1) == 0
// C2 is a power of 2
if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0)
- return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)),
+ return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)),
ConstantExpr::getNeg(cast<Constant>(Y)));
// X+C >u C2 -> (X & ~C2) != C
// iff C & C2 == 0
// C2+1 is a power of 2
if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0)
- return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)),
+ return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)),
ConstantExpr::getNeg(cast<Constant>(Y)));
return nullptr;
@@ -2493,13 +2493,13 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
// When none of the three constants satisfy the predicate for the RHS (C),
// the entire original Cmp can be simplified to a false.
- Value *Cond = Builder->getFalse();
+ Value *Cond = Builder.getFalse();
if (TrueWhenLessThan)
- Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
if (TrueWhenEqual)
- Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
if (TrueWhenGreaterThan)
- Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
return replaceInstUsesWith(Cmp, Cond);
}
@@ -2615,7 +2615,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
if (C->isNullValue() && BO->hasOneUse()) {
const APInt *BOC;
if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
- Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName());
+ Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
return new ICmpInst(Pred, NewRem,
Constant::getNullValue(BO->getType()));
}
@@ -2637,7 +2637,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
if (Value *NegVal = dyn_castNegVal(BOp0))
return new ICmpInst(Pred, NegVal, BOp1);
if (BO->hasOneUse()) {
- Value *Neg = Builder->CreateNeg(BOp1);
+ Value *Neg = Builder.CreateNeg(BOp1);
Neg->takeName(BO);
return new ICmpInst(Pred, BOp0, Neg);
}
@@ -2676,7 +2676,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
// Replace (X | C) == -1 with (X & ~C) == ~C.
// This removes the -1 constant.
Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
- Value *And = Builder->CreateAnd(BOp0, NotBOC);
+ Value *And = Builder.CreateAnd(BOp0, NotBOC);
return new ICmpInst(Pred, And, NotBOC);
}
break;
@@ -2740,23 +2740,26 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
if (!II || !Cmp.isEquality())
return nullptr;
- // Handle icmp {eq|ne} <intrinsic>, intcst.
+ // Handle icmp {eq|ne} <intrinsic>, Constant.
+ Type *Ty = II->getType();
switch (II->getIntrinsicID()) {
case Intrinsic::bswap:
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
- Cmp.setOperand(1, Builder->getInt(C->byteSwap()));
+ Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap()));
return &Cmp;
+
case Intrinsic::ctlz:
case Intrinsic::cttz:
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
if (*C == C->getBitWidth()) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
- Cmp.setOperand(1, ConstantInt::getNullValue(II->getType()));
+ Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
return &Cmp;
}
break;
+
case Intrinsic::ctpop: {
// popcount(A) == 0 -> A == 0 and likewise for !=
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
@@ -2764,8 +2767,8 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
if (IsZero || *C == C->getBitWidth()) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
- auto *NewOp = IsZero ? Constant::getNullValue(II->getType())
- : Constant::getAllOnesValue(II->getType());
+ auto *NewOp =
+ IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty);
Cmp.setOperand(1, NewOp);
return &Cmp;
}
@@ -2774,6 +2777,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
default:
break;
}
+
return nullptr;
}
@@ -2841,11 +2845,11 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
}
if (Transform) {
if (!Op1)
- Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
- I.getName());
+ Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
+ I.getName());
if (!Op2)
- Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
- I.getName());
+ Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
+ I.getName());
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
}
break;
@@ -3029,12 +3033,12 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
APInt AP1Abs = C1->getValue().abs();
APInt AP2Abs = C2->getValue().abs();
if (AP1Abs.uge(AP2Abs)) {
- ConstantInt *C3 = Builder->getInt(AP1 - AP2);
- Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ ConstantInt *C3 = Builder.getInt(AP1 - AP2);
+ Value *NewAdd = Builder.CreateNSWAdd(A, C3);
return new ICmpInst(Pred, NewAdd, C);
} else {
- ConstantInt *C3 = Builder->getInt(AP2 - AP1);
- Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ ConstantInt *C3 = Builder.getInt(AP2 - AP1);
+ Value *NewAdd = Builder.CreateNSWAdd(C, C3);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -3157,8 +3161,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
Constant *Mask = ConstantInt::get(
BO0->getType(),
APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
- Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
- Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
return new ICmpInst(Pred, And1, And2);
}
// If there are no trailing zeros in the multiplier, just eliminate
@@ -3315,8 +3319,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
ConstantInt *C1, *C2;
if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
Op1->hasOneUse()) {
- Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
- Value *Xor = Builder->CreateXor(C, NC);
+ Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
+ Value *Xor = Builder.CreateXor(C, NC);
return new ICmpInst(Pred, A, Xor);
}
@@ -3362,8 +3366,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
}
if (X) { // Build (X^Y) & Z
- Op1 = Builder->CreateXor(X, Y);
- Op1 = Builder->CreateAnd(Op1, Z);
+ Op1 = Builder.CreateXor(X, Y);
+ Op1 = Builder.CreateAnd(Op1, Z);
I.setOperand(0, Op1);
I.setOperand(1, Constant::getNullValue(Op1->getType()));
return &I;
@@ -3380,7 +3384,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt Pow2 = Cst1->getValue() + 1;
if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
- return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType()));
+ return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
}
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
@@ -3394,9 +3398,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
if (ShAmt < TypeBits && ShAmt != 0) {
ICmpInst::Predicate NewPred =
Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
- Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
- return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal));
+ return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
}
}
@@ -3406,9 +3410,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
unsigned TypeBits = Cst1->getBitWidth();
unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
if (ShAmt < TypeBits && ShAmt != 0) {
- Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
- Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
I.getName() + ".mask");
return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
}
@@ -3433,11 +3437,20 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt CmpV = Cst1->getValue().zext(ASize);
CmpV <<= ShAmt;
- Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
- return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));
+ Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
+ return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
}
}
+ // If both operands are byte-swapped or bit-reversed, just compare the
+ // original values.
+ // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
+ // and handle more intrinsics.
+ if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
+ (match(Op0, m_BitReverse(m_Value(A))) &&
+ match(Op1, m_BitReverse(m_Value(B)))))
+ return new ICmpInst(Pred, A, B);
+
return nullptr;
}
@@ -3462,7 +3475,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
RHSOp = RHSC->getOperand(0);
// If the pointer types don't match, insert a bitcast.
if (LHSCIOp->getType() != RHSOp->getType())
- RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
}
} else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -3546,7 +3559,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
// We're performing an unsigned comp with a sign extended value.
// This is true if the input is >= 0. [aka >s -1]
Constant *NegOne = Constant::getAllOnesValue(SrcTy);
- Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
+ Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
// Finally, return the value computed.
if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
@@ -3574,7 +3587,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
// may be pointing to the compare. We want to insert the new instructions
// before the add in case there are uses of the add between the add and the
// compare.
- Builder->SetInsertPoint(&OrigI);
+ Builder.SetInsertPoint(&OrigI);
switch (OCF) {
case OCF_INVALID:
@@ -3583,11 +3596,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
case OCF_UNSIGNED_ADD: {
OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
true);
if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+ return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
// Fall through uadd into sadd
LLVM_FALLTHROUGH;
@@ -3595,13 +3608,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
case OCF_SIGNED_ADD: {
// X + 0 -> {X, false}
if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder->getFalse(), false);
+ return SetResult(LHS, Builder.getFalse(), false);
// We can strength reduce this signed add into a regular add if we can prove
// that it will never overflow.
if (OCF == OCF_SIGNED_ADD)
if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
- return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
true);
break;
}
@@ -3610,15 +3623,15 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
case OCF_SIGNED_SUB: {
// X - 0 -> {X, false}
if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder->getFalse(), false);
+ return SetResult(LHS, Builder.getFalse(), false);
if (OCF == OCF_SIGNED_SUB) {
if (willNotOverflowSignedSub(LHS, RHS, OrigI))
- return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
true);
} else {
if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
- return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
true);
}
break;
@@ -3627,28 +3640,28 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
case OCF_UNSIGNED_MUL: {
OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
true);
if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+ return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
LLVM_FALLTHROUGH;
}
case OCF_SIGNED_MUL:
// X * undef -> undef
if (isa<UndefValue>(RHS))
- return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+ return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
// X * 0 -> {0, false}
if (match(RHS, m_Zero()))
- return SetResult(RHS, Builder->getFalse(), false);
+ return SetResult(RHS, Builder.getFalse(), false);
// X * 1 -> {X, false}
if (match(RHS, m_One()))
- return SetResult(LHS, Builder->getFalse(), false);
+ return SetResult(LHS, Builder.getFalse(), false);
if (OCF == OCF_SIGNED_MUL)
if (willNotOverflowSignedMul(LHS, RHS, OrigI))
- return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+ return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
true);
break;
}
@@ -3813,25 +3826,25 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
return nullptr;
}
- InstCombiner::BuilderTy *Builder = IC.Builder;
- Builder->SetInsertPoint(MulInstr);
+ InstCombiner::BuilderTy &Builder = IC.Builder;
+ Builder.SetInsertPoint(MulInstr);
// Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
Value *MulA = A, *MulB = B;
if (WidthA < MulWidth)
- MulA = Builder->CreateZExt(A, MulType);
+ MulA = Builder.CreateZExt(A, MulType);
if (WidthB < MulWidth)
- MulB = Builder->CreateZExt(B, MulType);
+ MulB = Builder.CreateZExt(B, MulType);
Value *F = Intrinsic::getDeclaration(I.getModule(),
Intrinsic::umul_with_overflow, MulType);
- CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
+ CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
// If there are uses of mul result other than the comparison, we know that
// they are truncation or binary AND. Change them to use result of
// mul.with.overflow and adjust properly mask/size.
if (MulVal->hasNUsesOrMore(2)) {
- Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
+ Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
for (User *U : MulVal->users()) {
if (U == &I || U == OtherVal)
continue;
@@ -3843,17 +3856,18 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
assert(BO->getOpcode() == Instruction::And);
// Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
- ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
- APInt ShortMask = CI->getValue().trunc(MulWidth);
- Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
- Instruction *Zext =
- cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
- IC.Worklist.Add(Zext);
+ Value *ShortMask =
+ Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth));
+ Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
+ Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
+ if (auto *ZextI = dyn_cast<Instruction>(Zext))
+ IC.Worklist.Add(ZextI);
IC.replaceInstUsesWith(*BO, Zext);
} else {
llvm_unreachable("Unexpected Binary operation");
}
- IC.Worklist.Add(cast<Instruction>(U));
+ if (auto *UI = dyn_cast<Instruction>(U))
+ IC.Worklist.Add(UI);
}
}
if (isa<Instruction>(OtherVal))
@@ -3884,7 +3898,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
llvm_unreachable("Unexpected predicate");
}
if (Inverse) {
- Value *Res = Builder->CreateExtractValue(Call, 1);
+ Value *Res = Builder.CreateExtractValue(Call, 1);
return BinaryOperator::CreateNot(Res);
}
@@ -4239,7 +4253,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Max == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- Builder->getInt(CI->getValue() - 1));
+ Builder.getInt(CI->getValue() - 1));
}
break;
case ICmpInst::ICMP_SGT:
@@ -4253,7 +4267,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- Builder->getInt(CI->getValue() + 1));
+ Builder.getInt(CI->getValue() + 1));
}
break;
case ICmpInst::ICMP_SGE:
@@ -4358,7 +4372,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
static Instruction *canonicalizeICmpBool(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
Value *A = I.getOperand(0), *B = I.getOperand(1);
- assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only");
+ assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
// A boolean compared to true/false can be simplified to Op0/true/false in
// 14 out of the 20 (10 predicates * 2 constants) possible combinations.
@@ -4465,8 +4479,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
- if (Op0->getType()->getScalarType()->isIntegerTy(1))
- if (Instruction *Res = canonicalizeICmpBool(I, *Builder))
+ if (Op0->getType()->isIntOrIntVectorTy(1))
+ if (Instruction *Res = canonicalizeICmpBool(I, Builder))
return Res;
if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
@@ -4559,7 +4573,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
} else {
// Otherwise, cast the RHS right before the icmp
- Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+ Op1 = Builder.CreateBitCast(Op1, Op0->getType());
}
}
return new ICmpInst(I.getPredicate(), Op0, Op1);
@@ -4592,8 +4606,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
- return new ICmpInst(I.getInversePredicate(),
- Builder->CreateAnd(A, B),
+ return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
Op1);
// ~X < ~Y --> Y < X
@@ -4693,10 +4706,10 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getFalse());
assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
- return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder.getTrue());
}
}
@@ -4762,9 +4775,9 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
Pred = ICmpInst::ICMP_NE;
break;
case FCmpInst::FCMP_ORD:
- return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder.getTrue());
case FCmpInst::FCMP_UNO:
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getFalse());
}
// Now we know that the APFloat is a normal number, zero or inf.
@@ -4782,8 +4795,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
Pred == ICmpInst::ICMP_SLE)
- return replaceInstUsesWith(I, Builder->getTrue());
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getTrue());
+ return replaceInstUsesWith(I, Builder.getFalse());
}
} else {
// If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -4794,8 +4807,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
Pred == ICmpInst::ICMP_ULE)
- return replaceInstUsesWith(I, Builder->getTrue());
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getTrue());
+ return replaceInstUsesWith(I, Builder.getFalse());
}
}
@@ -4807,8 +4820,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
Pred == ICmpInst::ICMP_SGE)
- return replaceInstUsesWith(I, Builder->getTrue());
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getTrue());
+ return replaceInstUsesWith(I, Builder.getFalse());
}
} else {
// See if the RHS value is < UnsignedMin.
@@ -4818,8 +4831,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
Pred == ICmpInst::ICMP_UGE)
- return replaceInstUsesWith(I, Builder->getTrue());
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getTrue());
+ return replaceInstUsesWith(I, Builder.getFalse());
}
}
@@ -4841,14 +4854,14 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
switch (Pred) {
default: llvm_unreachable("Unexpected integer comparison!");
case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
- return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder.getTrue());
case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getFalse());
case ICmpInst::ICMP_ULE:
// (float)int <= 4.4 --> int <= 4
// (float)int <= -4.4 --> false
if (RHS.isNegative())
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getFalse());
break;
case ICmpInst::ICMP_SLE:
// (float)int <= 4.4 --> int <= 4
@@ -4860,7 +4873,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
// (float)int < -4.4 --> false
// (float)int < 4.4 --> int <= 4
if (RHS.isNegative())
- return replaceInstUsesWith(I, Builder->getFalse());
+ return replaceInstUsesWith(I, Builder.getFalse());
Pred = ICmpInst::ICMP_ULE;
break;
case ICmpInst::ICMP_SLT:
@@ -4873,7 +4886,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
// (float)int > 4.4 --> int > 4
// (float)int > -4.4 --> true
if (RHS.isNegative())
- return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder.getTrue());
break;
case ICmpInst::ICMP_SGT:
// (float)int > 4.4 --> int > 4
@@ -4885,7 +4898,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
if (RHS.isNegative())
- return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder.getTrue());
Pred = ICmpInst::ICMP_UGT;
break;
case ICmpInst::ICMP_SGE:
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 87f11467b95e2..c38a4981bf1dc 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -21,8 +21,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -212,7 +210,7 @@ public:
/// \brief An IRBuilder that automatically inserts new instructions into the
/// worklist.
typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderTy;
- BuilderTy *Builder;
+ BuilderTy &Builder;
private:
// Mode in which we are running the combiner.
@@ -235,7 +233,7 @@ private:
bool MadeIRChange;
public:
- InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+ InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
const DataLayout &DL, LoopInfo *LI)
@@ -598,9 +596,8 @@ private:
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
- Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &,
- Instruction::BinaryOps, Value *, Value *, Value *,
- Value *);
+ Value *tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *,
+ Value *, Value *, Value *);
/// Match a select chain which produces one of three values based on whether
/// the LHS is less than, equal to, or greater than RHS respectively.
@@ -639,7 +636,6 @@ private:
APInt &UndefElts, unsigned Depth = 0);
Value *SimplifyVectorOp(BinaryOperator &Inst);
- Value *SimplifyBSwap(BinaryOperator &Inst);
/// Given a binary operator, cast instruction, or select which has a PHI node
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 26bee204e5a44..c59e1ce69ac22 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -189,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
return nullptr;
// Canonicalize it.
- Value *V = IC.Builder->getInt32(1);
+ Value *V = IC.Builder.getInt32(1);
AI.setOperand(0, V);
return &AI;
}
@@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
New->setAlignment(AI.getAlignment());
// Scan to the end of the allocation instructions, to skip over a block of
@@ -229,7 +229,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// any casting is exposed early.
Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
AI.setOperand(0, V);
return &AI;
}
@@ -458,10 +458,10 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
LI.getAllMetadata(MD);
- LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
- IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
+ LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
+ IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
- NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
@@ -518,10 +518,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
SI.getAllMetadata(MD);
- StoreInst *NewStore = IC.Builder->CreateAlignedStore(
- V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+ StoreInst *NewStore = IC.Builder.CreateAlignedStore(
+ V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
SI.getAlignment(), SI.isVolatile());
- NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope());
+ NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
@@ -613,7 +613,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
// Replace all the stores with stores of the newly loaded value.
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
auto *SI = cast<StoreInst>(*UI++);
- IC.Builder->SetInsertPoint(SI);
+ IC.Builder.SetInsertPoint(SI);
combineStoreToNewValue(IC, *SI, NewLoad);
IC.eraseInstFromFunction(*SI);
}
@@ -664,7 +664,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
NewLoad->setAAMetadata(AAMD);
- return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -689,15 +689,15 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Zero,
ConstantInt::get(IdxType, i),
};
- auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
- Name + ".elt");
+ auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+ Name + ".elt");
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
// Propagate AA metadata. It'll still be valid on the narrowed load.
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
L->setAAMetadata(AAMD);
- V = IC.Builder->CreateInsertValue(V, L, i);
+ V = IC.Builder.CreateInsertValue(V, L, i);
}
V->setName(Name);
@@ -712,7 +712,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
NewLoad->setAAMetadata(AAMD);
- return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -740,14 +740,14 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Zero,
ConstantInt::get(IdxType, i),
};
- auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
- Name + ".elt");
- auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
- Name + ".unpack");
+ auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+ Name + ".elt");
+ auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
+ Name + ".unpack");
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
L->setAAMetadata(AAMD);
- V = IC.Builder->CreateInsertValue(V, L, i);
+ V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
}
@@ -982,8 +982,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
return replaceInstUsesWith(
- LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
- LI.getName() + ".cast"));
+ LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
+ LI.getName() + ".cast"));
}
// None of the following transforms are legal for volatile/ordered atomic
@@ -1019,15 +1019,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
unsigned Align = LI.getAlignment();
if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
- LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
- SI->getOperand(1)->getName()+".val");
- LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
- SI->getOperand(2)->getName()+".val");
+ LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val");
+ LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val");
assert(LI.isUnordered() && "implied by above");
V1->setAlignment(Align);
- V1->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
V2->setAlignment(Align);
- V2->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
return SelectInst::Create(SI->getCondition(), V1, V2);
}
@@ -1172,7 +1172,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
// If the struct only have one element, we unpack.
unsigned Count = ST->getNumElements();
if (Count == 1) {
- V = IC.Builder->CreateExtractValue(V, 0);
+ V = IC.Builder.CreateExtractValue(V, 0);
combineStoreToNewValue(IC, SI, V);
return true;
}
@@ -1201,12 +1201,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
Zero,
ConstantInt::get(IdxType, i),
};
- auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
- AddrName);
- auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+ AddrName);
+ auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- llvm::Instruction *NS =
- IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
AAMDNodes AAMD;
SI.getAAMetadata(AAMD);
NS->setAAMetadata(AAMD);
@@ -1219,7 +1218,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
// If the array only have one element, we unpack.
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
- V = IC.Builder->CreateExtractValue(V, 0);
+ V = IC.Builder.CreateExtractValue(V, 0);
combineStoreToNewValue(IC, SI, V);
return true;
}
@@ -1252,11 +1251,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
Zero,
ConstantInt::get(IdxType, i),
};
- auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
- AddrName);
- auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+ AddrName);
+ auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, Offset);
- Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
AAMDNodes AAMD;
SI.getAAMetadata(AAMD);
NS->setAAMetadata(AAMD);
@@ -1541,7 +1540,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.isVolatile(),
SI.getAlignment(),
SI.getOrdering(),
- SI.getSynchScope());
+ SI.getSyncScopeID());
InsertNewInstBefore(NewSI, *BBI);
// The debug locations of the original instructions might differ; merge them.
NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(),
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 579639a6194e9..e3a50220f94e2 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -39,8 +39,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
Value *A = nullptr, *B = nullptr, *One = nullptr;
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
match(One, m_One())) {
- A = IC.Builder->CreateSub(A, B);
- return IC.Builder->CreateShl(One, A);
+ A = IC.Builder.CreateSub(A, B);
+ return IC.Builder.CreateShl(One, A);
}
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
@@ -250,9 +250,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
ConstantInt *C1;
Value *Sub = nullptr;
if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
- Sub = Builder->CreateSub(X, Y, "suba");
+ Sub = Builder.CreateSub(X, Y, "suba");
else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
- Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+ Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc");
if (Sub)
return
BinaryOperator::CreateMul(Sub,
@@ -272,11 +272,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *X;
Constant *C1;
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
- Value *Mul = Builder->CreateMul(C1, Op1);
+ Value *Mul = Builder.CreateMul(C1, Op1);
// Only go forward with the transform if C1*CI simplifies to a tidier
// constant.
if (!match(Mul, m_Mul(m_Value(), m_Value())))
- return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
+ return BinaryOperator::CreateAdd(Builder.CreateMul(X, Op1), Mul);
}
}
}
@@ -318,7 +318,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
: Instruction::SRem;
- Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1);
+ Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1);
if (DivOp1 == Y)
return BinaryOperator::CreateSub(X, Rem);
return BinaryOperator::CreateSub(Rem, X);
@@ -326,7 +326,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
/// i1 mul -> i1 and.
- if (I.getType()->getScalarType()->isIntegerTy(1))
+ if (I.getType()->isIntOrIntVectorTy(1))
return BinaryOperator::CreateAnd(Op0, Op1);
// X*(1 << Y) --> X << Y
@@ -368,7 +368,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
if (BoolCast) {
- Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+ Value *V = Builder.CreateSub(Constant::getNullValue(I.getType()),
BoolCast);
return BinaryOperator::CreateAnd(V, OtherOp);
}
@@ -386,7 +386,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
// Insert the new, smaller mul.
Value *NewMul =
- Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
+ Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
return new SExtInst(NewMul, I.getType());
}
}
@@ -403,7 +403,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
willNotOverflowSignedMul(Op0Conv->getOperand(0),
Op1Conv->getOperand(0), I)) {
// Insert the new integer mul.
- Value *NewMul = Builder->CreateNSWMul(
+ Value *NewMul = Builder.CreateNSWMul(
Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
return new SExtInst(NewMul, I.getType());
}
@@ -422,7 +422,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) {
// Insert the new, smaller mul.
Value *NewMul =
- Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
+ Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
return new ZExtInst(NewMul, I.getType());
}
}
@@ -439,7 +439,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
willNotOverflowUnsignedMul(Op0Conv->getOperand(0),
Op1Conv->getOperand(0), I)) {
// Insert the new integer mul.
- Value *NewMul = Builder->CreateNUWMul(
+ Value *NewMul = Builder.CreateNUWMul(
Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
return new ZExtInst(NewMul, I.getType());
}
@@ -698,11 +698,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// if pattern detected emit alternate sequence
if (OpX && OpY) {
- BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->setFastMathFlags(Log2->getFastMathFlags());
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
- Value *FMulVal = Builder->CreateFMul(OpX, Log2);
- Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ Value *FMulVal = Builder.CreateFMul(OpX, Log2);
+ Value *FSub = Builder.CreateFSub(FMulVal, OpX);
FSub->takeName(&I);
return replaceInstUsesWith(I, FSub);
}
@@ -714,23 +714,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
for (int i = 0; i < 2; i++) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
- BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->setFastMathFlags(I.getFastMathFlags());
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
// -X * -Y => X*Y
if (N1) {
- Value *FMul = Builder->CreateFMul(N0, N1);
+ Value *FMul = Builder.CreateFMul(N0, N1);
FMul->takeName(&I);
return replaceInstUsesWith(I, FMul);
}
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
- Value *T = Builder->CreateFMul(N0, Opnd1);
- Value *Neg = Builder->CreateFNeg(T);
+ Value *T = Builder.CreateFMul(N0, Opnd1);
+ Value *Neg = Builder.CreateFNeg(T);
Neg->takeName(&I);
return replaceInstUsesWith(I, Neg);
}
@@ -755,10 +755,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Y = Opnd0_0;
if (Y) {
- BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->setFastMathFlags(I.getFastMathFlags());
- Value *T = Builder->CreateFMul(Opnd1, Opnd1);
- Value *R = Builder->CreateFMul(T, Y);
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder.CreateFMul(Opnd1, Opnd1);
+ Value *R = Builder.CreateFMul(T, Y);
R->takeName(&I);
return replaceInstUsesWith(I, R);
}
@@ -824,7 +824,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
*I = SI->getOperand(NonNullOperand);
Worklist.Add(&*BBI);
} else if (*I == SelectCond) {
- *I = Builder->getInt1(NonNullOperand == 1);
+ *I = Builder.getInt1(NonNullOperand == 1);
Worklist.Add(&*BBI);
}
}
@@ -938,20 +938,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
if (match(Op0, m_One())) {
- assert(!I.getType()->getScalarType()->isIntegerTy(1) &&
- "i1 divide not removed?");
+ assert(!I.getType()->isIntOrIntVectorTy(1) && "i1 divide not removed?");
if (I.getOpcode() == Instruction::SDiv) {
// If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
// result is one, if Op1 is -1 then the result is minus one, otherwise
// it's zero.
- Value *Inc = Builder->CreateAdd(Op1, Op0);
- Value *Cmp = Builder->CreateICmpULT(
- Inc, ConstantInt::get(I.getType(), 3));
+ Value *Inc = Builder.CreateAdd(Op1, Op0);
+ Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(I.getType(), 3));
return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
} else {
// If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
// result is one, otherwise it's zero.
- return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType());
+ return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), I.getType());
}
}
@@ -1026,7 +1024,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
// X udiv C, where C >= signbit
static Instruction *foldUDivNegCst(Value *Op0, Value *Op1,
const BinaryOperator &I, InstCombiner &IC) {
- Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1));
+ Value *ICI = IC.Builder.CreateICmpULT(Op0, cast<ConstantInt>(Op1));
return SelectInst::Create(ICI, Constant::getNullValue(I.getType()),
ConstantInt::get(I.getType(), 1));
@@ -1045,10 +1043,9 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N))))
llvm_unreachable("match should never fail here!");
if (*CI != 1)
- N = IC.Builder->CreateAdd(N,
- ConstantInt::get(N->getType(), CI->logBase2()));
+ N = IC.Builder.CreateAdd(N, ConstantInt::get(N->getType(), CI->logBase2()));
if (Op1 != ShiftLeft)
- N = IC.Builder->CreateZExt(N, Op1->getType());
+ N = IC.Builder.CreateZExt(N, Op1->getType());
BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
if (I.isExact())
LShr->setIsExact();
@@ -1134,7 +1131,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
return new ZExtInst(
- Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
+ Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
I.getType());
// (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
@@ -1209,7 +1206,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Constant *NarrowDivisor =
ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
- Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor);
+ Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
return new SExtInst(NarrowOp, Op0->getType());
}
}
@@ -1217,7 +1214,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
if (Constant *RHS = dyn_cast<Constant>(Op1)) {
// X/INT_MIN -> X == INT_MIN
if (RHS->isMinSignedValue())
- return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType());
+ return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
// -X/C --> X/-C provided the negation doesn't overflow.
Value *X;
@@ -1380,7 +1377,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
// (X/Y) / Z => X / (Y*Z)
//
if (!isa<Constant>(Y) || !isa<Constant>(Op1)) {
- NewInst = Builder->CreateFMul(Y, Op1);
+ NewInst = Builder.CreateFMul(Y, Op1);
if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
FastMathFlags Flags = I.getFastMathFlags();
Flags &= cast<Instruction>(Op0)->getFastMathFlags();
@@ -1392,7 +1389,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
// Z / (X/Y) => Z*Y / X
//
if (!isa<Constant>(Y) || !isa<Constant>(Op0)) {
- NewInst = Builder->CreateFMul(Op0, Y);
+ NewInst = Builder.CreateFMul(Op0, Y);
if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
FastMathFlags Flags = I.getFastMathFlags();
Flags &= cast<Instruction>(Op1)->getFastMathFlags();
@@ -1483,28 +1480,28 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
// (zext A) urem (zext B) --> zext (A urem B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
- return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1),
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(Op1, N1);
+ Value *Add = Builder.CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
}
// 1 urem X -> zext(X != 1)
if (match(Op0, m_One())) {
- Value *Cmp = Builder->CreateICmpNE(Op1, Op0);
- Value *Ext = Builder->CreateZExt(Cmp, I.getType());
+ Value *Cmp = Builder.CreateICmpNE(Op1, Op0);
+ Value *Ext = Builder.CreateZExt(Cmp, I.getType());
return replaceInstUsesWith(I, Ext);
}
// X urem C -> X < C ? X : X - C, where C >= signbit.
const APInt *DivisorC;
if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) {
- Value *Cmp = Builder->CreateICmpULT(Op0, Op1);
- Value *Sub = Builder->CreateSub(Op0, Op1);
+ Value *Cmp = Builder.CreateICmpULT(Op0, Op1);
+ Value *Sub = Builder.CreateSub(Op0, Op1);
return SelectInst::Create(Cmp, Op0, Sub);
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 5dbf1e85b05b9..0011412c2bf47 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -636,10 +636,10 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
/// Return an existing non-zero constant if this phi node has one, otherwise
/// return constant 1.
static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
- assert(isa<IntegerType>(PN.getType()) && "Expect only intger type phi");
+ assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");
for (Value *V : PN.operands())
if (auto *ConstVA = dyn_cast<ConstantInt>(V))
- if (!ConstVA->isZeroValue())
+ if (!ConstVA->isZero())
return ConstVA;
return ConstantInt::get(cast<IntegerType>(PN.getType()), 1);
}
@@ -836,12 +836,12 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
// Otherwise, do an extract in the predecessor.
- Builder->SetInsertPoint(Pred->getTerminator());
+ Builder.SetInsertPoint(Pred->getTerminator());
Value *Res = InVal;
if (Offset)
- Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(),
Offset), "extract");
- Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ Res = Builder.CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 80c6595904e11..4eebe8255998c 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -61,12 +61,12 @@ static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
}
}
-static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder,
SelectPatternFlavor SPF, Value *A,
Value *B) {
CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
assert(CmpInst::isIntPredicate(Pred));
- return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+ return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
}
/// We want to turn code that looks like this:
@@ -167,8 +167,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// Fold this by inserting a select from the input values.
Value *NewSI =
- Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
- FI->getOperand(0), SI.getName() + ".v", &SI);
+ Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName() + ".v", &SI);
return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
@@ -211,8 +211,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
}
// If we reach here, they do have operations in common.
- Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
- SI.getName() + ".v", &SI);
+ Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+ SI.getName() + ".v", &SI);
Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
@@ -227,8 +227,8 @@ static bool isSelect01(Constant *C1, Constant *C2) {
return false;
if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
return false;
- return C1I->isOne() || C1I->isAllOnesValue() ||
- C2I->isOne() || C2I->isAllOnesValue();
+ return C1I->isOne() || C1I->isMinusOne() ||
+ C2I->isOne() || C2I->isMinusOne();
}
/// Try to fold the select into one of the operands to allow further
@@ -254,7 +254,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
// Avoid creating select between 2 constants unless it's selecting
// between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
- Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+ Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C);
NewSel->takeName(TVI);
BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
@@ -284,7 +284,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
// Avoid creating select between 2 constants unless it's selecting
// between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
- Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+ Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp);
NewSel->takeName(FVI);
BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
@@ -315,7 +315,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// 3. The magnitude of C2 and C1 are flipped
static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !SI.getType()->isIntegerTy())
return nullptr;
@@ -383,22 +383,22 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
if (NeedAnd) {
// Insert the AND instruction on the input to the truncate.
APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);
- V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1));
+ V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1));
}
if (C2Log > C1Log) {
- V = Builder->CreateZExtOrTrunc(V, Y->getType());
- V = Builder->CreateShl(V, C2Log - C1Log);
+ V = Builder.CreateZExtOrTrunc(V, Y->getType());
+ V = Builder.CreateShl(V, C2Log - C1Log);
} else if (C1Log > C2Log) {
- V = Builder->CreateLShr(V, C1Log - C2Log);
- V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder.CreateLShr(V, C1Log - C2Log);
+ V = Builder.CreateZExtOrTrunc(V, Y->getType());
} else
- V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder.CreateZExtOrTrunc(V, Y->getType());
if (NeedXor)
- V = Builder->CreateXor(V, *C2);
+ V = Builder.CreateXor(V, *C2);
- return Builder->CreateOr(V, Y);
+ return Builder.CreateOr(V, Y);
}
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
@@ -414,7 +414,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
/// into:
/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
@@ -449,8 +449,8 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
Type *Ty = NewI->getArgOperand(1)->getType();
NewI->setArgOperand(1, Constant::getNullValue(Ty));
- Builder->Insert(NewI);
- return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+ Builder.Insert(NewI);
+ return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
}
return nullptr;
@@ -597,7 +597,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
- if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder))
+ if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
return NewSel;
bool Changed = adjustMinMax(SI, *ICI);
@@ -617,23 +617,23 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
if (TrueVal->getType() == Ty) {
if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
ConstantInt *C1 = nullptr, *C2 = nullptr;
- if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) {
C1 = dyn_cast<ConstantInt>(TrueVal);
C2 = dyn_cast<ConstantInt>(FalseVal);
- } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) {
C1 = dyn_cast<ConstantInt>(FalseVal);
C2 = dyn_cast<ConstantInt>(TrueVal);
}
if (C1 && C2) {
// This shift results in either -1 or 0.
- Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+ Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1);
// Check if we can express the operation with a single or.
- if (C2->isAllOnesValue())
- return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+ if (C2->isMinusOne())
+ return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1));
- Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
- return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue());
+ return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1));
}
}
}
@@ -684,19 +684,19 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
// (X & Y) == 0 ? X : X ^ Y --> X & ~Y
if (TrueWhenUnset && TrueVal == X &&
match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
- V = Builder->CreateAnd(X, ~(*Y));
+ V = Builder.CreateAnd(X, ~(*Y));
// (X & Y) != 0 ? X ^ Y : X --> X & ~Y
else if (!TrueWhenUnset && FalseVal == X &&
match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
- V = Builder->CreateAnd(X, ~(*Y));
+ V = Builder.CreateAnd(X, ~(*Y));
// (X & Y) == 0 ? X ^ Y : X --> X | Y
else if (TrueWhenUnset && FalseVal == X &&
match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
- V = Builder->CreateOr(X, *Y);
+ V = Builder.CreateOr(X, *Y);
// (X & Y) != 0 ? X : X ^ Y --> X | Y
else if (!TrueWhenUnset && TrueVal == X &&
match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
- V = Builder->CreateOr(X, *Y);
+ V = Builder.CreateOr(X, *Y);
if (V)
return replaceInstUsesWith(SI, V);
@@ -809,8 +809,8 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
(SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
SelectInst *SI = cast<SelectInst>(Inner);
Value *NewSI =
- Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(),
- SI->getTrueValue(), SI->getName(), SI);
+ Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(),
+ SI->getTrueValue(), SI->getName(), SI);
return replaceInstUsesWith(Outer, NewSI);
}
@@ -848,15 +848,15 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
if (!NotA)
- NotA = Builder->CreateNot(A);
+ NotA = Builder.CreateNot(A);
if (!NotB)
- NotB = Builder->CreateNot(B);
+ NotB = Builder.CreateNot(B);
if (!NotC)
- NotC = Builder->CreateNot(C);
+ NotC = Builder.CreateNot(C);
Value *NewInner = generateMinMaxSelectPattern(
Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
- Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+ Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern(
Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
return replaceInstUsesWith(Outer, NewOuter);
}
@@ -868,9 +868,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
/// icmp instruction with zero, and we have an 'and' with the non-constant value
/// and a power of two we can turn the select into a shift on the result of the
/// 'and'.
-static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
- ConstantInt *FalseVal,
- InstCombiner::BuilderTy *Builder) {
+static Value *foldSelectICmpAnd(const SelectInst &SI, APInt TrueVal,
+ APInt FalseVal,
+ InstCombiner::BuilderTy &Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return nullptr;
@@ -886,56 +886,53 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
// If both select arms are non-zero see if we have a select of the form
// 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
// for 'x ? 2^n : 0' and fix the thing up at the end.
- ConstantInt *Offset = nullptr;
- if (!TrueVal->isZero() && !FalseVal->isZero()) {
- if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ APInt Offset(TrueVal.getBitWidth(), 0);
+ if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) {
+ if ((TrueVal - FalseVal).isPowerOf2())
Offset = FalseVal;
- else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ else if ((FalseVal - TrueVal).isPowerOf2())
Offset = TrueVal;
else
return nullptr;
// Adjust TrueVal and FalseVal to the offset.
- TrueVal = ConstantInt::get(Builder->getContext(),
- TrueVal->getValue() - Offset->getValue());
- FalseVal = ConstantInt::get(Builder->getContext(),
- FalseVal->getValue() - Offset->getValue());
+ TrueVal -= Offset;
+ FalseVal -= Offset;
}
// Make sure the mask in the 'and' and one of the select arms is a power of 2.
if (!AndRHS->getValue().isPowerOf2() ||
- (!TrueVal->getValue().isPowerOf2() &&
- !FalseVal->getValue().isPowerOf2()))
+ (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()))
return nullptr;
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
- ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
- unsigned ValZeros = ValC->getValue().logBase2();
+ const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC.logBase2();
unsigned AndZeros = AndRHS->getValue().logBase2();
// If types don't match we can still convert the select by introducing a zext
// or a trunc of the 'and'. The trunc case requires that all of the truncated
// bits are zero, we can figure that out by looking at the 'and' mask.
- if (AndZeros >= ValC->getBitWidth())
+ if (AndZeros >= ValC.getBitWidth())
return nullptr;
- Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
+ Value *V = Builder.CreateZExtOrTrunc(LHS, SI.getType());
if (ValZeros > AndZeros)
- V = Builder->CreateShl(V, ValZeros - AndZeros);
+ V = Builder.CreateShl(V, ValZeros - AndZeros);
else if (ValZeros < AndZeros)
- V = Builder->CreateLShr(V, AndZeros - ValZeros);
+ V = Builder.CreateLShr(V, AndZeros - ValZeros);
// Okay, now we know that everything is set up, we just don't know whether we
// have a icmp_ne or icmp_eq and whether the true or false val is the zero.
- bool ShouldNotVal = !TrueVal->isZero();
+ bool ShouldNotVal = !TrueVal.isNullValue();
ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
if (ShouldNotVal)
- V = Builder->CreateXor(V, ValC);
+ V = Builder.CreateXor(V, ValC);
// Apply an offset if needed.
- if (Offset)
- V = Builder->CreateAdd(V, Offset);
+ if (!Offset.isNullValue())
+ V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset));
return V;
}
@@ -1024,7 +1021,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
// TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too.
Value *X = ExtInst->getOperand(0);
Type *SmallType = X->getType();
- if (!SmallType->getScalarType()->isIntegerTy(1))
+ if (!SmallType->isIntOrIntVectorTy(1))
return nullptr;
Constant *C;
@@ -1045,7 +1042,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
// select Cond, (ext X), C --> ext(select Cond, X, C')
// select Cond, C, (ext X) --> ext(select Cond, C', X)
- Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
+ Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);
}
@@ -1184,7 +1181,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (SelType->getScalarType()->isIntegerTy(1) &&
+ if (SelType->isIntOrIntVectorTy(1) &&
TrueVal->getType() == CondVal->getType()) {
if (match(TrueVal, m_One())) {
// Change: A = select B, true, C --> A = or B, C
@@ -1192,7 +1189,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
if (match(TrueVal, m_Zero())) {
// Change: A = select B, false, C --> A = and !B, C
- Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
if (match(FalseVal, m_Zero())) {
@@ -1201,7 +1198,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
if (match(FalseVal, m_One())) {
// Change: A = select B, C, true --> A = or !B, C
- Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
return BinaryOperator::CreateOr(NotCond, TrueVal);
}
@@ -1226,7 +1223,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>
// because that may need 3 instructions to splat the condition value:
// extend, insertelement, shufflevector.
- if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
+ if (SelType->isIntOrIntVectorTy() &&
+ CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
// select C, 1, 0 -> zext C to int
if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))
return new ZExtInst(CondVal, SelType);
@@ -1237,20 +1235,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select C, 0, 1 -> zext !C to int
if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) {
- Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
return new ZExtInst(NotCond, SelType);
}
// select C, 0, -1 -> sext !C to int
if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) {
- Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
return new SExtInst(NotCond, SelType);
}
}
if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
- if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC->getValue(),
+ FalseValC->getValue(), Builder))
return replaceInstUsesWith(SI, V);
// See if we are selecting two values based on a comparison of the two values.
@@ -1288,10 +1287,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
- IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->setFastMathFlags(FCI->getFastMathFlags());
- Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
- FCI->getName() + ".inv");
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(FCI->getFastMathFlags());
+ Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal,
+ FCI->getName() + ".inv");
return SelectInst::Create(NewCond, FalseVal, TrueVal,
SI.getName() + ".p");
@@ -1331,10 +1330,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
- IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->setFastMathFlags(FCI->getFastMathFlags());
- Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
- FCI->getName() + ".inv");
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(FCI->getFastMathFlags());
+ Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal,
+ FCI->getName() + ".inv");
return SelectInst::Create(NewCond, FalseVal, TrueVal,
SI.getName() + ".p");
@@ -1350,7 +1349,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))
return Result;
- if (Instruction *Add = foldAddSubSelect(SI, *Builder))
+ if (Instruction *Add = foldAddSubSelect(SI, Builder))
return Add;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
@@ -1381,16 +1380,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *Cmp;
if (CmpInst::isIntPredicate(Pred)) {
- Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ Cmp = Builder.CreateICmp(Pred, LHS, RHS);
} else {
- IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
- Builder->setFastMathFlags(FMF);
- Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+ Builder.setFastMathFlags(FMF);
+ Cmp = Builder.CreateFCmp(Pred, LHS, RHS);
}
- Value *NewSI = Builder->CreateCast(
- CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI),
+ Value *NewSI = Builder.CreateCast(
+ CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI),
SelType);
return replaceInstUsesWith(SI, NewSI);
}
@@ -1425,13 +1424,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
(SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
if (NumberOfNots >= 2) {
- Value *NewLHS = Builder->CreateNot(LHS);
- Value *NewRHS = Builder->CreateNot(RHS);
- Value *NewCmp = SPF == SPF_SMAX
- ? Builder->CreateICmpSLT(NewLHS, NewRHS)
- : Builder->CreateICmpULT(NewLHS, NewRHS);
+ Value *NewLHS = Builder.CreateNot(LHS);
+ Value *NewRHS = Builder.CreateNot(RHS);
+ Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS)
+ : Builder.CreateICmpULT(NewLHS, NewRHS);
Value *NewSI =
- Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+ Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS));
return replaceInstUsesWith(SI, NewSI);
}
}
@@ -1461,7 +1459,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// We choose this as normal form to enable folding on the And and shortening
// paths for the values (this helps GetUnderlyingObjects() for example).
if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
- Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition());
SI.setOperand(0, And);
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
@@ -1479,7 +1477,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
- Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition());
SI.setOperand(0, Or);
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
@@ -1541,7 +1539,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return replaceInstUsesWith(SI, FalseVal);
}
- if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder))
+ if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder))
return BitCastSel;
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 1bb1a85367d1b..7ed141c7fd79d 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -47,7 +47,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) &&
isKnownNonNegative(C, DL, 0, &AC, &I, &DT))
return BinaryOperator::Create(
- I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A);
+ I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A);
// X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
// Because shifts by negative values (which could occur if A were negative)
@@ -56,8 +56,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
// FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
// demand the sign bit (and many others) here??
- Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
- Op1->getName());
+ Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1),
+ Op1->getName());
I.setOperand(1, Rem);
return &I;
}
@@ -260,9 +260,9 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
- V = IC.Builder->CreateShl(C, NumBits);
+ V = IC.Builder.CreateShl(C, NumBits);
else
- V = IC.Builder->CreateLShr(C, NumBits);
+ V = IC.Builder.CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (auto *C = dyn_cast<Constant>(V))
if (auto *FoldedC =
@@ -289,7 +289,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Shl:
case Instruction::LShr:
return foldShiftedShift(cast<BinaryOperator>(I), NumBits, isLeftShift,
- *(IC.Builder));
+ IC.Builder);
case Instruction::Select:
I->setOperand(
@@ -353,7 +353,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
Constant *ShAmt =
ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType());
// (shift2 (shift1 & 0x00FF), c2)
- Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+ Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
// For logical shifts, the truncation has the effect of making the high
// part of the register be zeros. Emulate this by inserting an AND to
@@ -375,9 +375,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
}
// shift1 & 0x00FF
- Value *And = Builder->CreateAnd(NSh,
- ConstantInt::get(I.getContext(), MaskV),
- TI->getName());
+ Value *And = Builder.CreateAnd(NSh,
+ ConstantInt::get(I.getContext(), MaskV),
+ TI->getName());
// Return the value truncated to the interesting size.
return new TruncInst(And, I.getType());
@@ -401,10 +401,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
m_Specific(Op1)))) {
Value *YS = // (Y << C)
- Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
// (X + (Y << C))
- Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
- Op0BO->getOperand(1)->getName());
+ Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
@@ -421,11 +421,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
m_ConstantInt(CC)))) {
Value *YS = // (Y << C)
- Builder->CreateShl(Op0BO->getOperand(0), Op1,
- Op0BO->getName());
+ Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
// X & (CC << C)
- Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
- V1->getName()+".mask");
+ Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
}
LLVM_FALLTHROUGH;
@@ -437,10 +436,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
m_Specific(Op1)))) {
Value *YS = // (Y << C)
- Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
// (X + (Y << C))
- Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
- Op0BO->getOperand(0)->getName());
+ Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
@@ -456,10 +455,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
m_ConstantInt(CC))) && V2 == Op1) {
Value *YS = // (Y << C)
- Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
// X & (CC << C)
- Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
- V1->getName()+".mask");
+ Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
}
@@ -502,7 +501,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
Value *NewShift =
- Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
NewShift->takeName(Op0BO);
return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
@@ -541,7 +540,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
if (ShAmt < SrcWidth &&
MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
- return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty);
+ return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty);
}
// (X >>u C) << C --> X & (-1 << C)
@@ -641,7 +640,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
// ctpop.i32(x)>>5 --> zext(x == -1)
bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop;
Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0);
- Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS);
return new ZExtInst(Cmp, Ty);
}
@@ -658,7 +657,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return NewLShr;
}
// (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2)
- Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact());
+ Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
}
@@ -671,7 +670,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return NewShl;
}
// (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2)
- Value *NewShl = Builder->CreateShl(X, ShiftDiff);
+ Value *NewShl = Builder.CreateShl(X, ShiftDiff);
APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
@@ -692,7 +691,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
// lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
if (Op0->hasOneUse()) {
- Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1);
+ Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1);
return new ZExtInst(NewLShr, Ty);
}
}
@@ -701,7 +700,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) {
// The new shift amount can't be more than the narrow source type.
unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1);
- Value *AShr = Builder->CreateAShr(X, NewShAmt);
+ Value *AShr = Builder.CreateAShr(X, NewShAmt);
return new ZExtInst(AShr, Ty);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 03841164b58de..5689c06042391 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -548,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
// X % -1 demands all the bits because we don't want to introduce
// INT_MIN % -1 (== undef) by accident.
- if (Rem->isAllOnesValue())
+ if (Rem->isMinusOne())
break;
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
@@ -1627,10 +1627,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
Args.push_back(II->getArgOperand(I));
- IRBuilderBase::InsertPointGuard Guard(*Builder);
- Builder->SetInsertPoint(II);
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(II);
- CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
+ CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);
@@ -1657,15 +1657,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (NewNumElts == 1) {
- return Builder->CreateInsertElement(UndefValue::get(V->getType()),
- NewCall, static_cast<uint64_t>(0));
+ return Builder.CreateInsertElement(UndefValue::get(V->getType()),
+ NewCall, static_cast<uint64_t>(0));
}
SmallVector<uint32_t, 8> EltMask;
for (unsigned I = 0; I < VWidth; ++I)
EltMask.push_back(I);
- Value *Shuffle = Builder->CreateShuffleVector(
+ Value *Shuffle = Builder.CreateShuffleVector(
NewCall, UndefValue::get(NewTy), EltMask);
MadeChange = true;
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 926e46655eb86..dd71a31b644b3 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -204,11 +204,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (I->hasOneUse() &&
cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
- Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
- EI.getName()+".lhs");
+ Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
Value *newEI1 =
- Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
- EI.getName()+".rhs");
+ Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
newEI0, newEI1, BO);
}
@@ -250,8 +250,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// Bitcasts can change the number of vector elements, and they cost
// nothing.
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
- Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
- EI.getIndexOperand());
+ Value *EE = Builder.CreateExtractElement(CI->getOperand(0),
+ EI.getIndexOperand());
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
@@ -269,20 +269,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Value *Cond = SI->getCondition();
if (Cond->getType()->isVectorTy()) {
- Cond = Builder->CreateExtractElement(Cond,
- EI.getIndexOperand(),
- Cond->getName() + ".elt");
+ Cond = Builder.CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
}
Value *V1Elem
- = Builder->CreateExtractElement(TrueVal,
- EI.getIndexOperand(),
- TrueVal->getName() + ".elt");
+ = Builder.CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
Value *V2Elem
- = Builder->CreateExtractElement(FalseVal,
- EI.getIndexOperand(),
- FalseVal->getName() + ".elt");
+ = Builder.CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
return SelectInst::Create(Cond,
V1Elem,
V2Elem,
@@ -837,7 +837,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
return Shuf;
- if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder))
+ if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
return NewInsElt;
// Turn a sequence of inserts that broadcasts a scalar into a single
@@ -1020,9 +1020,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Constant *, 16> MaskValues;
for (int i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] == -1)
- MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
+ MaskValues.push_back(UndefValue::get(Builder.getInt32Ty()));
else
- MaskValues.push_back(Builder->getInt32(Mask[i]));
+ MaskValues.push_back(Builder.getInt32(Mask[i]));
}
return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
ConstantVector::get(MaskValues));
@@ -1095,7 +1095,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
return InsertElementInst::Create(V, I->getOperand(1),
- Builder->getInt32(Index), "", I);
+ Builder.getInt32(Index), "", I);
}
}
llvm_unreachable("failed to reorder elements of vector instruction!");
@@ -1275,9 +1275,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
UndefValue::get(Int32Ty));
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
- V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(ShuffleMask),
- SVI.getName() + ".extract");
+ V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(ShuffleMask),
+ SVI.getName() + ".extract");
BegIdx = 0;
}
unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
@@ -1287,10 +1287,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
auto *NewBC =
BCAlreadyExists
? NewBCs[CastSrcTy]
- : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+ : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
if (!BCAlreadyExists)
NewBCs[CastSrcTy] = NewBC;
- auto *Ext = Builder->CreateExtractElement(
+ auto *Ext = Builder.CreateExtractElement(
NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
// The shufflevector isn't being replaced: the bitcast that used it
// is. InstCombine will visit the newly-created instructions.
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 723414635d6fb..90e2323991555 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -88,7 +88,7 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024),
cl::desc("Maximum array size considered when doing a combine"));
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, DL, GEP);
+ return llvm::EmitGEPOffset(&Builder, DL, GEP);
}
/// Return true if it is desirable to convert an integer computation from a
@@ -498,8 +498,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
- BinaryOperator &I,
+Value *InstCombiner::tryFactorization(BinaryOperator &I,
Instruction::BinaryOps InnerOpcode,
Value *A, Value *B, Value *C, Value *D) {
assert(A && B && C && D && "All values must be provided");
@@ -525,9 +524,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
- V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+ V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
if (V) {
- SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+ SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
}
}
@@ -545,9 +544,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
- V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+ V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
if (V) {
- SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+ SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
}
}
@@ -610,7 +609,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
// a common term.
if (Op0 && Op1 && LHSOpcode == RHSOpcode)
- if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))
+ if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
return V;
// The instruction has the form "(A op' B) op (C)". Try to factorize common
@@ -618,7 +617,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Op0)
if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
if (Value *V =
- tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))
+ tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
return V;
// The instruction has the form "(B) op (C op' D)". Try to factorize common
@@ -626,7 +625,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Op1)
if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
if (Value *V =
- tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))
+ tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
return V;
}
@@ -644,7 +643,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) {
// They do! Return "L op' R".
++NumExpand;
- C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C = Builder.CreateBinOp(InnerOpcode, L, R);
C->takeName(&I);
return C;
}
@@ -663,7 +662,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) {
// They do! Return "L op' R".
++NumExpand;
- A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A = Builder.CreateBinOp(InnerOpcode, L, R);
A->takeName(&I);
return A;
}
@@ -678,18 +677,18 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Value *V =
SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
SI1->getFalseValue(), SQ.getWithInstruction(&I)))
- SI = Builder->CreateSelect(SI0->getCondition(),
- Builder->CreateBinOp(TopLevelOpcode,
- SI0->getTrueValue(),
- SI1->getTrueValue()),
- V);
+ SI = Builder.CreateSelect(SI0->getCondition(),
+ Builder.CreateBinOp(TopLevelOpcode,
+ SI0->getTrueValue(),
+ SI1->getTrueValue()),
+ V);
if (Value *V =
SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
SI1->getTrueValue(), SQ.getWithInstruction(&I)))
- SI = Builder->CreateSelect(
+ SI = Builder.CreateSelect(
SI0->getCondition(), V,
- Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
- SI1->getFalseValue()));
+ Builder.CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue()));
if (SI) {
SI->takeName(&I);
return SI;
@@ -751,9 +750,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
}
static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
- InstCombiner *IC) {
+ InstCombiner::BuilderTy &Builder) {
if (auto *Cast = dyn_cast<CastInst>(&I))
- return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType());
+ return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
assert(I.isBinaryOp() && "Unexpected opcode for select folding");
@@ -772,8 +771,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
std::swap(Op0, Op1);
auto *BO = cast<BinaryOperator>(&I);
- Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
- SO->getName() + ".op");
+ Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
+ SO->getName() + ".op");
auto *FPInst = dyn_cast<Instruction>(RI);
if (FPInst && isa<FPMathOperator>(FPInst))
FPInst->copyFastMathFlags(BO);
@@ -791,7 +790,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
// Bool selects with constant operands can be folded to logical ops.
- if (SI->getType()->getScalarType()->isIntegerTy(1))
+ if (SI->getType()->isIntOrIntVectorTy(1))
return nullptr;
// If it's a bitcast involving vectors, make sure it has the same number of
@@ -825,13 +824,13 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
}
}
- Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this);
- Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this);
+ Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
+ Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
}
static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
- InstCombiner *IC) {
+ InstCombiner::BuilderTy &Builder) {
bool ConstIsRHS = isa<Constant>(I->getOperand(1));
Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
@@ -845,7 +844,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
if (!ConstIsRHS)
std::swap(Op0, Op1);
- Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
+ Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
auto *FPInst = dyn_cast<Instruction>(RI);
if (FPInst && isa<FPMathOperator>(FPInst))
FPInst->copyFastMathFlags(I);
@@ -916,7 +915,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// If we are going to have to insert a new computation, do so right before the
// predecessor's terminator.
if (NonConstBB)
- Builder->SetInsertPoint(NonConstBB->getTerminator());
+ Builder.SetInsertPoint(NonConstBB->getTerminator());
// Next, add all of the operands to the PHI.
if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
@@ -948,9 +947,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
// non-vector phis, this transformation was always profitable because
// the select would be generated exactly once in the NonConstBB.
- Builder->SetInsertPoint(ThisBB->getTerminator());
- InV = Builder->CreateSelect(PN->getIncomingValue(i),
- TrueVInPred, FalseVInPred, "phitmp");
+ Builder.SetInsertPoint(ThisBB->getTerminator());
+ InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
+ FalseVInPred, "phitmp");
}
NewPN->addIncoming(InV, ThisBB);
}
@@ -961,16 +960,17 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
else if (isa<ICmpInst>(CI))
- InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
- C, "phitmp");
+ InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
else
- InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
- C, "phitmp");
+ InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this);
+ Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i),
+ Builder);
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else {
@@ -981,8 +981,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
else
- InV = Builder->CreateCast(CI->getOpcode(),
- PN->getIncomingValue(i), I.getType(), "phitmp");
+ InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
+ I.getType(), "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
}
@@ -1328,8 +1328,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
/// \brief Creates node of binary operation with the same attributes as the
/// specified one but with other operands.
static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
- InstCombiner::BuilderTy *B) {
- Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ InstCombiner::BuilderTy &B) {
+ Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS);
// If LHS and RHS are constant, BO won't be a binary operator.
if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
NewBO->copyIRFlags(&Inst);
@@ -1365,7 +1365,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) {
Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
RShuf->getOperand(0), Builder);
- return Builder->CreateShuffleVector(
+ return Builder.CreateShuffleVector(
NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask());
}
@@ -1404,7 +1404,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
- return Builder->CreateShuffleVector(NewBO,
+ return Builder.CreateShuffleVector(NewBO,
UndefValue::get(Inst.getType()), Shuffle->getMask());
}
}
@@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, NewIndexType, true);
+ *I = Builder.CreateIntCast(*I, NewIndexType, true);
MadeChange = true;
}
}
@@ -1546,10 +1546,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// set that index.
PHINode *NewPN;
{
- IRBuilderBase::InsertPointGuard Guard(*Builder);
- Builder->SetInsertPoint(PN);
- NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
- PN->getNumOperands());
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(PN);
+ NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
}
for (auto &I : PN->operands())
@@ -1669,8 +1669,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// pointer arithmetic.
if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
Operator *Index = cast<Operator>(V);
- Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
- Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1));
return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
}
// Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
@@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// ->
// %0 = GEP i8 addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
- return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
+ return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType());
}
if (ArrayType *XATy =
@@ -1751,10 +1751,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
Value *NewGEP = GEP.isInBounds()
- ? Builder->CreateInBoundsGEP(
+ ? Builder.CreateInBoundsGEP(
nullptr, StrippedPtr, Idx, GEP.getName())
- : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
- GEP.getName());
+ : Builder.CreateGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
@@ -1772,9 +1772,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP =
GEP.isInBounds()
- ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
- GEP.getName())
- : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+ ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName())
+ : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1807,10 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP may not be "inbounds".
Value *NewGEP =
GEP.isInBounds() && NSW
- ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
- GEP.getName())
- : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
- GEP.getName());
+ ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1849,10 +1849,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
NewIdx};
Value *NewGEP = GEP.isInBounds() && NSW
- ? Builder->CreateInBoundsGEP(
+ ? Builder.CreateInBoundsGEP(
SrcElTy, StrippedPtr, Off, GEP.getName())
- : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
- GEP.getName());
+ : Builder.CreateGEP(SrcElTy, StrippedPtr, Off,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEP.getType());
@@ -1916,8 +1916,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP =
GEP.isInBounds()
- ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
- : Builder->CreateGEP(nullptr, Operand, NewIndices);
+ ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices)
+ : Builder.CreateGEP(nullptr, Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return replaceInstUsesWith(GEP, NGEP);
@@ -2166,8 +2166,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
// Insert a new store to null because we cannot modify the CFG here.
- Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
- UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ Builder.CreateStore(ConstantInt::getTrue(FI.getContext()),
+ UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
return eraseInstFromFunction(FI);
}
@@ -2281,8 +2281,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
// the backend should extend back to a legal type for the target.
if (NewWidth > 0 && NewWidth < Known.getBitWidth()) {
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
- Builder->SetInsertPoint(&SI);
- Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
+ Builder.SetInsertPoint(&SI);
+ Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
SI.setCondition(NewCond);
for (auto Case : SI.cases()) {
@@ -2339,8 +2339,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// %E = insertvalue { i32 } %X, i32 42, 0
// by switching the order of the insert and extract (though the
// insertvalue should be left in, since it may have other uses).
- Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
- EV.getIndices());
+ Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
+ EV.getIndices());
return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
makeArrayRef(insi, inse));
}
@@ -2415,17 +2415,17 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
// Prefix an i32 0 since we need the first element.
- Indices.push_back(Builder->getInt32(0));
+ Indices.push_back(Builder.getInt32(0));
for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
I != E; ++I)
- Indices.push_back(Builder->getInt32(*I));
+ Indices.push_back(Builder.getInt32(*I));
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
- Builder->SetInsertPoint(L);
- Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
- L->getPointerOperand(), Indices);
- Instruction *NL = Builder->CreateLoad(GEP);
+ Builder.SetInsertPoint(L);
+ Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
+ L->getPointerOperand(), Indices);
+ Instruction *NL = Builder.CreateLoad(GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
AAMDNodes Nodes;
@@ -2922,8 +2922,8 @@ bool InstCombiner::run() {
}
// Now that we have an instruction, try combining it to simplify it.
- Builder->SetInsertPoint(I);
- Builder->SetCurrentDebugLocation(I->getDebugLoc());
+ Builder.SetInsertPoint(I);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
#ifndef NDEBUG
std::string OrigI;
@@ -3160,7 +3160,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
- InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
+ InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines,
AA, AC, TLI, DT, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7eea44d6aca03..184940b7ea583 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1230,7 +1230,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
// dyn_cast as we might get UndefValue
if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
- if (Masked->isNullValue())
+ if (Masked->isZero())
// Mask is constant false, so no instrumentation needed.
continue;
// If we have a true or undef value, fall through to doInstrumentAddress
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index 3802f9fbf7dbe..16e2e6b4e7304 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -24,10 +27,10 @@
#include <utility>
#include <vector>
-namespace llvm {
-
#define DEBUG_TYPE "cfgmst"
+namespace llvm {
+
/// \brief An union-find based Minimum Spanning Tree for CFG
///
/// Implements a Union-find algorithm to compute Minimum Spanning Tree
@@ -220,5 +223,8 @@ public:
}
};
-#undef DEBUG_TYPE // "cfgmst"
} // end namespace llvm
+
+#undef DEBUG_TYPE // "cfgmst"
+
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 9c14b0149fdc1..db8fa89779479 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -112,7 +112,7 @@ cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
cl::desc("Do counter register promotion"),
cl::init(false));
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
- cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10),
+ cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
cl::desc("Max number counter promotions per loop to avoid"
" increasing register pressure too much"));
@@ -121,10 +121,21 @@ cl::opt<int>
MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
cl::desc("Max number of allowed counter promotions"));
-cl::opt<bool> SpeculativeCounterPromotion(
- cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false),
- cl::desc("Allow counter promotion for loops with multiple exiting blocks "
- " or top-tested loops. "));
+cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
+ cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+ cl::desc("The max number of exiting blocks of a loop to allow "
+ " speculative counter promotion"));
+
+cl::opt<bool> SpeculativeCounterPromotionToLoop(
+ cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+ cl::desc("When the option is false, if the target block is in a loop, "
+ "the promotion will be disallowed unless the promoted counter "
+ " update can be further/iteratively promoted into an acyclic "
+ " region."));
+
+cl::opt<bool> IterativeCounterPromotion(
+ cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+ cl::desc("Allow counter promotion across the whole loop nest."));
class InstrProfilingLegacyPass : public ModulePass {
InstrProfiling InstrProf;
@@ -150,6 +161,7 @@ public:
}
};
+///
/// A helper class to promote one counter RMW operation in the loop
/// into register update.
///
@@ -158,16 +170,19 @@ public:
///
class PGOCounterPromoterHelper : public LoadAndStorePromoter {
public:
- PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA,
- Value *Init, BasicBlock *PH,
- ArrayRef<BasicBlock *> ExitBlocks,
- ArrayRef<Instruction *> InsertPts)
+ PGOCounterPromoterHelper(
+ Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
+ BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<Instruction *> InsertPts,
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ LoopInfo &LI)
: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
- InsertPts(InsertPts) {
+ InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
assert(isa<LoadInst>(L));
assert(isa<StoreInst>(S));
SSA.AddAvailableValue(PH, Init);
}
+
void doExtraRewritesBeforeFinalDeletion() const override {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
@@ -179,12 +194,21 @@ public:
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
IRBuilder<> Builder(InsertPos);
if (AtomicCounterUpdatePromoted)
+ // automic update currently can only be promoted across the current
+ // loop, not the whole loop nest.
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
AtomicOrdering::SequentiallyConsistent);
else {
LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
- Builder.CreateStore(NewVal, Addr);
+ auto *NewStore = Builder.CreateStore(NewVal, Addr);
+
+ // Now update the parent loop's candidate list:
+ if (IterativeCounterPromotion) {
+ auto *TargetLoop = LI.getLoopFor(ExitBlock);
+ if (TargetLoop)
+ LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
+ }
}
}
}
@@ -193,6 +217,8 @@ private:
Instruction *Store;
ArrayRef<BasicBlock *> ExitBlocks;
ArrayRef<Instruction *> InsertPts;
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+ LoopInfo &LI;
};
/// A helper class to do register promotion for all profile counter
@@ -200,12 +226,15 @@ private:
///
class PGOCounterPromoter {
public:
- PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop)
- : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) {
+ PGOCounterPromoter(
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ Loop &CurLoop, LoopInfo &LI)
+ : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
+ LI(LI) {
SmallVector<BasicBlock *, 8> LoopExitBlocks;
SmallPtrSet<BasicBlock *, 8> BlockSet;
- ParentLoop.getExitBlocks(LoopExitBlocks);
+ L.getExitBlocks(LoopExitBlocks);
for (BasicBlock *ExitBlock : LoopExitBlocks) {
if (BlockSet.insert(ExitBlock).second) {
@@ -216,55 +245,97 @@ public:
}
bool run(int64_t *NumPromoted) {
- // We can't insert into a catchswitch.
- bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
- return isa<CatchSwitchInst>(Exit->getTerminator());
- });
-
- if (HasCatchSwitch)
- return false;
-
- if (!ParentLoop.hasDedicatedExits())
- return false;
-
- BasicBlock *PH = ParentLoop.getLoopPreheader();
- if (!PH)
- return false;
-
- BasicBlock *H = ParentLoop.getHeader();
- bool TopTested =
- ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H));
- if (!SpeculativeCounterPromotion &&
- (TopTested || ParentLoop.getExitingBlock() == nullptr))
+ unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
+ if (MaxProm == 0)
return false;
unsigned Promoted = 0;
- for (auto &Cand : Candidates) {
+ for (auto &Cand : LoopToCandidates[&L]) {
SmallVector<PHINode *, 4> NewPHIs;
SSAUpdater SSA(&NewPHIs);
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
- PH, ExitBlocks, InsertPts);
+ L.getLoopPreheader(), ExitBlocks,
+ InsertPts, LoopToCandidates, LI);
Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
Promoted++;
- if (Promoted >= MaxNumOfPromotionsPerLoop)
+ if (Promoted >= MaxProm)
break;
+
(*NumPromoted)++;
if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
break;
}
DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
- << ParentLoop.getLoopDepth() << ")\n");
+ << L.getLoopDepth() << ")\n");
return Promoted != 0;
}
private:
- ArrayRef<LoadStorePair> Candidates;
+ bool allowSpeculativeCounterPromotion(Loop *LP) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return true;
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return false;
+ return true;
+ }
+
+ // Returns the max number of Counter Promotions for LP.
+ unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
+ // We can't insert into a catchswitch.
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ LP->getExitBlocks(LoopExitBlocks);
+ if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
+ return isa<CatchSwitchInst>(Exit->getTerminator());
+ }))
+ return 0;
+
+ if (!LP->hasDedicatedExits())
+ return 0;
+
+ BasicBlock *PH = LP->getLoopPreheader();
+ if (!PH)
+ return 0;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ LP->getExitingBlocks(ExitingBlocks);
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return MaxNumOfPromotionsPerLoop;
+
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return 0;
+
+ // Whether the target block is in a loop does not matter:
+ if (SpeculativeCounterPromotionToLoop)
+ return MaxNumOfPromotionsPerLoop;
+
+ // Now check the target block:
+ unsigned MaxProm = MaxNumOfPromotionsPerLoop;
+ for (auto *TargetBlock : LoopExitBlocks) {
+ auto *TargetLoop = LI.getLoopFor(TargetBlock);
+ if (!TargetLoop)
+ continue;
+ unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
+ unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
+ MaxProm =
+ std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
+ PendingCandsInTarget);
+ }
+ return MaxProm;
+ }
+
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
SmallVector<BasicBlock *, 8> ExitBlocks;
SmallVector<Instruction *, 8> InsertPts;
- Loop &ParentLoop;
+ Loop &L;
+ LoopInfo &LI;
};
} // end anonymous namespace
@@ -349,8 +420,10 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
- for (auto *Loop : Loops) {
- PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop);
+ // Do a post-order traversal of the loops so that counter updates can be
+ // iteratively hoisted outside the loop nest.
+ for (auto *Loop : llvm::reverse(Loops)) {
+ PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
Promoter.run(&TotalCountersPromoted);
}
}
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 363539b2886f3..4eb758c69c581 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
-#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/IR/BasicBlock.h"
@@ -108,4 +108,4 @@ namespace llvm {
} // End llvm namespace
-#endif
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index df4ee9969c02f..1348e0ed0ed00 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2918,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClDumpStrictInstructions)
dumpInst(I);
DEBUG(dbgs() << "DEFAULT: " << I << "\n");
- for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
- insertShadowCheck(I.getOperand(i), &I);
+ for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
+ Value *Operand = I.getOperand(i);
+ if (Operand->getType()->isSized())
+ insertShadowCheck(Operand, &I);
+ }
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
}
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 0e7d11c553977..8e4bfc0b91bc5 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -224,7 +224,7 @@ std::string getBranchCondString(Instruction *TI) {
OS << "_Zero";
else if (CV->isOne())
OS << "_One";
- else if (CV->isAllOnesValue())
+ else if (CV->isMinusOne())
OS << "_MinusOne";
else
OS << "_Const";
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index a991792bf5a39..ec6904486e109 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -379,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
}
static bool isAtomic(Instruction *I) {
+ // TODO: Ask TTI whether synchronization scope is between threads.
if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isAtomic() && LI->getSynchScope() == CrossThread;
+ return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread;
if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isAtomic() && SI->getSynchScope() == CrossThread;
+ return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread;
if (isa<AtomicRMWInst>(I))
return true;
if (isa<AtomicCmpXchgInst>(I))
@@ -676,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
I->eraseFromParent();
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
- Function *F = FI->getSynchScope() == SingleThread ?
+ Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ?
TsanAtomicSignalFence : TsanAtomicThreadFence;
CallInst *C = CallInst::Create(F, Args);
ReplaceInstWithInst(I, C);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index a49c9b68c97d0..122c9314e022a 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <tuple>
using namespace llvm;
@@ -55,7 +56,7 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
static cl::opt<bool> ConstHoistWithBlockFrequency(
- "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
+ "consthoist-with-block-frequency", cl::init(true), cl::Hidden,
cl::desc("Enable the use of the block frequency analysis to reduce the "
"chance to execute const materialization more frequently than "
"without hoisting."));
@@ -231,7 +232,8 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
// Return the optimal insert points in BBs.
if (Node == Entry) {
BBs.clear();
- if (InsertPtsFreq > BFI.getBlockFreq(Node))
+ if (InsertPtsFreq > BFI.getBlockFreq(Node) ||
+ (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))
BBs.insert(Entry);
else
BBs.insert(InsertPts.begin(), InsertPts.end());
@@ -244,7 +246,15 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
// Choose to insert in Node or in subtree of Node.
- if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
+ // Don't hoist to EHPad because we may not find a proper place to insert
+ // in EHPad.
+ // If the total frequency of InsertPts is the same as the frequency of the
+ // target Node, and InsertPts contains more than one nodes, choose hoisting
+ // to reduce code size.
+ if (NodeInBBs ||
+ (!Node->isEHPad() &&
+ (InsertPtsFreq > BFI.getBlockFreq(Node) ||
+ (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {
ParentInsertPts.insert(Node);
ParentPtsFreq += BFI.getBlockFreq(Node);
} else {
@@ -392,42 +402,15 @@ void ConstantHoistingPass::collectConstantCandidates(
if (Inst->isCast())
return;
- // Can't handle inline asm. Skip it.
- if (auto Call = dyn_cast<CallInst>(Inst))
- if (isa<InlineAsm>(Call->getCalledValue()))
- return;
-
- // Switch cases must remain constant, and if the value being tested is
- // constant the entire thing should disappear.
- if (isa<SwitchInst>(Inst))
- return;
-
- // Static allocas (constant size in the entry block) are handled by
- // prologue/epilogue insertion so they're free anyway. We definitely don't
- // want to make them non-constant.
- auto AI = dyn_cast<AllocaInst>(Inst);
- if (AI && AI->isStaticAlloca())
- return;
-
- // Constants in GEPs that index into a struct type should not be hoisted.
- if (isa<GetElementPtrInst>(Inst)) {
- gep_type_iterator GTI = gep_type_begin(Inst);
-
- // Collect constant for first operand.
- collectConstantCandidates(ConstCandMap, Inst, 0);
- // Scan rest operands.
- for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) {
- // Only collect constants that index into a non struct type.
- if (!GTI.isStruct()) {
- collectConstantCandidates(ConstCandMap, Inst, Idx);
- }
- }
- return;
- }
-
// Scan all operands.
for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
- collectConstantCandidates(ConstCandMap, Inst, Idx);
+ // The cost of materializing the constants (defined in
+ // `TargetTransformInfo::getIntImmCost`) for instructions which only take
+ // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So
+ // it's safe for us to collect constant candidates from all IntrinsicInsts.
+ if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx);
+ }
} // end of for all operands
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 0f92760a874b5..7fd77a082b822 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -670,7 +670,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (auto *KnownCond = AvailableValues.lookup(CondI)) {
// Is the condition known to be true?
if (isa<ConstantInt>(KnownCond) &&
- cast<ConstantInt>(KnownCond)->isOneValue()) {
+ cast<ConstantInt>(KnownCond)->isOne()) {
DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
removeMSSA(Inst);
Inst->eraseFromParent();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c0f628eb61e61..0fe72f3f73318 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
- bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
+ Expression(uint32_t o = ~2U) : opcode(o) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
- e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
- e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
- e.commutative = true;
return e;
}
@@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
- if (PHINode *PN = dyn_cast<PHINode>(V))
- NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t e = assignExpNewValueNum(exp).first;
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- auto ValNum = assignExpNewValueNum(exp);
- if (ValNum.second) {
- valueNumbering[C] = ValNum.first;
- return ValNum.first;
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
}
if (!MD) {
- uint32_t e = assignExpNewValueNum(exp).first;
+ e = nextValueNumber++;
valueNumbering[C] = e;
return e;
}
@@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
- case Instruction::PHI:
- valueNumbering[V] = nextValueNumber;
- NumberingPhi[nextValueNumber] = cast<PHINode>(V);
- return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t e = assignExpNewValueNum(exp).first;
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVN::ValueTable::lookup(Value *V) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- if (Verify) {
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
- }
- return (VI != valueNumbering.end()) ? VI->second : 0;
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
}
/// Returns the value number of the given comparison,
@@ -559,28 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- return assignExpNewValueNum(exp).first;
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ return e;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
- NumberingPhi.clear();
- PhiTranslateTable.clear();
nextValueNumber = 1;
- Expressions.clear();
- ExprIdx.clear();
- nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
- uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
- // If V is PHINode, V <--> value number is an one-to-one mapping.
- if (isa<PHINode>(V))
- NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -1183,7 +1166,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
LI->isVolatile(), LI->getAlignment(),
- LI->getOrdering(), LI->getSynchScope(),
+ LI->getOrdering(), LI->getSyncScopeID(),
UnavailablePred->getTerminator());
// Transfer the old load's AA tags to the new load.
@@ -1219,7 +1202,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
V->takeName(LI);
if (Instruction *I = dyn_cast<Instruction>(V))
I->setDebugLoc(LI->getDebugLoc());
- if (V->getType()->getScalarType()->isPointerTy())
+ if (V->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI)
@@ -1306,7 +1289,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// to propagate LI's DebugLoc because LI may not post-dominate I.
if (LI->getDebugLoc() && LI->getParent() == I->getParent())
I->setDebugLoc(LI->getDebugLoc());
- if (V->getType()->getScalarType()->isPointerTy())
+ if (V->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumGVNLoad;
@@ -1460,7 +1443,7 @@ bool GVN::processLoad(LoadInst *L) {
reportLoadElim(L, AvailableValue, ORE);
// Tell MDA to rexamine the reused pointer since we might have more
// information after forwarding it.
- if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
+ if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(AvailableValue);
return true;
}
@@ -1468,95 +1451,6 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
-/// Return a pair the first field showing the value number of \p Exp and the
-/// second field showing whether it is a value number newly created.
-std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
- uint32_t &e = expressionNumbering[Exp];
- bool CreateNewValNum = !e;
- if (CreateNewValNum) {
- Expressions.push_back(Exp);
- if (ExprIdx.size() < nextValueNumber + 1)
- ExprIdx.resize(nextValueNumber * 2);
- e = nextValueNumber;
- ExprIdx[nextValueNumber++] = nextExprNumber++;
- }
- return {e, CreateNewValNum};
-}
-
-/// Return whether all the values related with the same \p num are
-/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
- GVN &Gvn) {
- LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
- while (Vals && Vals->BB == BB)
- Vals = Vals->Next;
- return !Vals;
-}
-
-/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
- const BasicBlock *PhiBlock, uint32_t Num,
- GVN &Gvn) {
- auto FindRes = PhiTranslateTable.find({Num, Pred});
- if (FindRes != PhiTranslateTable.end())
- return FindRes->second;
- uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
- PhiTranslateTable.insert({{Num, Pred}, NewNum});
- return NewNum;
-}
-
-/// Translate value number \p Num using phis, so that it has the values of
-/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
- const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn) {
- if (PHINode *PN = NumberingPhi[Num]) {
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
- if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
- return TransVal;
- }
- return Num;
- }
-
- // If there is any value related with Num is defined in a BB other than
- // PhiBlock, it cannot depend on a phi in PhiBlock without going through
- // a backedge. We can do an early exit in that case to save compile time.
- if (!areAllValsInBB(Num, PhiBlock, Gvn))
- return Num;
-
- if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
- return Num;
- Expression Exp = Expressions[ExprIdx[Num]];
-
- for (unsigned i = 0; i < Exp.varargs.size(); i++) {
- // For InsertValue and ExtractValue, some varargs are index numbers
- // instead of value numbers. Those index numbers should not be
- // translated.
- if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
- (i > 0 && Exp.opcode == Instruction::ExtractValue))
- continue;
- Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
- }
-
- if (Exp.commutative) {
- assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
- if (Exp.varargs[0] > Exp.varargs[1]) {
- std::swap(Exp.varargs[0], Exp.varargs[1]);
- uint32_t Opcode = Exp.opcode >> 8;
- if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
- Exp.opcode = (Opcode << 8) |
- CmpInst::getSwappedPredicate(
- static_cast<CmpInst::Predicate>(Exp.opcode & 255));
- }
- }
-
- if (uint32_t NewNum = expressionNumbering[Exp])
- return NewNum;
- return Num;
-}
-
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1601,15 +1495,6 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
-
-void GVN::assignBlockRPONumber(Function &F) {
- uint32_t NextBlockNumber = 1;
- ReversePostOrderTraversal<Function *> RPOT(&F);
- for (BasicBlock *BB : RPOT)
- BlockRPONumber[BB] = NextBlockNumber++;
-}
-
-
// Tries to replace instruction with const, using information from
// ReplaceWithConstMap.
bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
@@ -1713,7 +1598,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// RHS neither 'true' nor 'false' - bail out.
continue;
// Whether RHS equals 'true'. Otherwise it equals 'false'.
- bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownTrue = CI->isMinusOne();
bool isKnownFalse = !isKnownTrue;
// If "A && B" is known true then both A and B are known true. If "A || B"
@@ -1813,7 +1698,7 @@ bool GVN::processInstruction(Instruction *I) {
Changed = true;
}
if (Changed) {
- if (MD && V->getType()->getScalarType()->isPointerTy())
+ if (MD && V->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(V);
++NumGVNSimpl;
return true;
@@ -1924,7 +1809,7 @@ bool GVN::processInstruction(Instruction *I) {
// Remove it!
patchAndReplaceAllUsesWith(I, Repl);
- if (MD && Repl->getType()->getScalarType()->isPointerTy())
+ if (MD && Repl->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(Repl);
markInstructionForDeletion(I);
return true;
@@ -1971,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
- assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -2043,7 +1927,7 @@ bool GVN::processBlock(BasicBlock *BB) {
// Instantiate an expression in a predecessor that lacked it.
bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- BasicBlock *Curr, unsigned int ValNo) {
+ unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -2061,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- uint32_t TValNo =
- VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
- if (Value *V = findLeader(Pred, TValNo)) {
+ if (Value *V = findLeader(Pred, VN.lookup(Op))) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -2080,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
-
- unsigned Num = VN.lookupOrAdd(Instr);
- VN.add(Instr, Num);
+ VN.add(Instr, ValNo);
// Update the availability map to include the new instruction.
- addToLeaderTable(Num, Instr, Pred);
+ addToLeaderTable(ValNo, Instr, Pred);
return true;
}
@@ -2123,27 +2003,18 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
for (BasicBlock *P : predecessors(CurrentBlock)) {
- // We're not interested in PRE where blocks with predecessors that are
- // not reachable.
- if (!DT->isReachableFromEntry(P)) {
+ // We're not interested in PRE where the block is its
+ // own predecessor, or in blocks with predecessors
+ // that are not reachable.
+ if (P == CurrentBlock) {
NumWithout = 2;
break;
- }
- // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
- // when CurInst has operand defined in CurrentBlock (so it may be defined
- // by phi in the loop header).
- if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
- any_of(CurInst->operands(), [&](const Use &U) {
- if (auto *Inst = dyn_cast<Instruction>(U.get()))
- return Inst->getParent() == CurrentBlock;
- return false;
- })) {
+ } else if (!DT->isReachableFromEntry(P)) {
NumWithout = 2;
break;
}
- uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
- Value *predV = findLeader(P, TValNo);
+ Value *predV = findLeader(P, ValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
@@ -2183,7 +2054,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
}
// We need to insert somewhere, so let's give it a shot
PREInstr = CurInst->clone();
- if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
+ if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
DEBUG(verifyRemoved(PREInstr));
PREInstr->deleteValue();
@@ -2212,7 +2083,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (MD && Phi->getType()->getScalarType()->isPointerTy())
+ if (MD && Phi->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
@@ -2297,7 +2168,6 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
- BlockRPONumber.clear();
TableAllocator.Reset();
}
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3c8fbd35bf8c1..89b28f0aeee6b 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -232,7 +232,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:{
const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
- if (!IsVolatile || !IsVolatile->isNullValue())
+ if (!IsVolatile || !IsVolatile->isZero())
return false;
LLVM_FALLTHROUGH;
@@ -358,7 +358,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// If the operands of the expression on the top are already explored,
// adds that expression to the resultant postorder.
if (PostorderStack.back().second) {
- Postorder.push_back(TopVal);
+ if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
+ Postorder.push_back(TopVal);
PostorderStack.pop_back();
continue;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 05293eb0079fc..ee3de51b13606 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1212,7 +1212,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
LoadInst *NewVal = new LoadInst(
LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
- LI->getSynchScope(), UnavailablePred->getTerminator());
+ LI->getSyncScopeID(), UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index c41cc42db5e2c..ac4dd44a0e906 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -148,25 +148,27 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
LoopInfo &LI, LPMUpdater *Updater = nullptr) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
- // We can only remove the loop if there is a preheader that we can
- // branch from after removing it.
+ // We can only remove the loop if there is a preheader that we can branch from
+ // after removing it. Also, if LoopSimplify form is not available, stay out
+ // of trouble.
BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
+ if (!Preheader || !L->hasDedicatedExits()) {
+ DEBUG(dbgs()
+ << "Deletion requires Loop with preheader and dedicated exits.\n");
return false;
-
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->hasDedicatedExits())
- return false;
-
+ }
// We can't remove loops that contain subloops. If the subloops were dead,
// they would already have been removed in earlier executions of this pass.
- if (L->begin() != L->end())
+ if (L->begin() != L->end()) {
+ DEBUG(dbgs() << "Loop contains subloops.\n");
return false;
+ }
BasicBlock *ExitBlock = L->getUniqueExitBlock();
if (ExitBlock && isLoopNeverExecuted(L)) {
+ DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
// Set incoming value to undef for phi nodes in the exit block.
BasicBlock::iterator BI = ExitBlock->begin();
while (PHINode *P = dyn_cast<PHINode>(BI)) {
@@ -188,20 +190,26 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
// be in the situation of needing to be able to solve statically which exit
// block will be branched to, or trying to preserve the branching logic in
// a loop invariant manner.
- if (!ExitBlock)
+ if (!ExitBlock) {
+ DEBUG(dbgs() << "Deletion requires single exit block\n");
return false;
-
+ }
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader))
+ if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) {
+ DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
return Changed;
+ }
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(S))
+ if (isa<SCEVCouldNotCompute>(S)) {
+ DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n");
return Changed;
+ }
+ DEBUG(dbgs() << "Loop is invariant, delete it!");
deleteDeadLoop(L, DT, SE, LI, Updater);
++NumDeleted;
@@ -311,6 +319,9 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &Updater) {
+
+ DEBUG(dbgs() << "Analyzing Loop for deletion: ");
+ DEBUG(L.dump());
if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater))
return PreservedAnalyses::all();
@@ -350,5 +361,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DEBUG(dbgs() << "Analyzing Loop for deletion: ");
+ DEBUG(L->dump());
return deleteLoopIfDead(L, DT, SE, LI);
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8b435050ac769..4a6a35c0ab1b9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1160,7 +1160,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
if (!Dec ||
!((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
(SubInst->getOpcode() == Instruction::Add &&
- Dec->isAllOnesValue()))) {
+ Dec->isMinusOne()))) {
return false;
}
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 9f3875a3027f4..606136dc31a4b 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -757,8 +757,11 @@ bool LoopInterchangeLegality::currentLimitations() {
PHINode *InnerInductionVar;
SmallVector<PHINode *, 8> Inductions;
SmallVector<PHINode *, 8> Reductions;
- if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
+ if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
+ DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
+ << "are supported currently.\n");
return true;
+ }
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
@@ -771,16 +774,25 @@ bool LoopInterchangeLegality::currentLimitations() {
InnerInductionVar = Inductions.pop_back_val();
Reductions.clear();
- if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
+ if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
+ DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
+ << "are supported currently.\n");
return true;
+ }
// Outer loop cannot have reduction because then loops will not be tightly
// nested.
- if (!Reductions.empty())
+ if (!Reductions.empty()) {
+ DEBUG(dbgs() << "Outer loops with reductions are not supported "
+ << "currently.\n");
return true;
+ }
// TODO: Currently we handle only loops with 1 induction variable.
- if (Inductions.size() != 1)
+ if (Inductions.size() != 1) {
+ DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+ << "supported currently.\n");
return true;
+ }
// TODO: Triangular loops are not handled for now.
if (!isLoopStructureUnderstood(InnerInductionVar)) {
@@ -791,12 +803,16 @@ bool LoopInterchangeLegality::currentLimitations() {
// TODO: We only handle LCSSA PHI's corresponding to reduction for now.
BasicBlock *LoopExitBlock =
getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
- if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
+ if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
+ DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
return true;
+ }
LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
- if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
+ if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
+ DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
return true;
+ }
// TODO: Current limitation: Since we split the inner loop latch at the point
// were induction variable is incremented (induction.next); We cannot have
@@ -816,8 +832,11 @@ bool LoopInterchangeLegality::currentLimitations() {
InnerIndexVarInc =
dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
- if (!InnerIndexVarInc)
+ if (!InnerIndexVarInc) {
+ DEBUG(dbgs() << "Did not find an instruction to increment the induction "
+ << "variable.\n");
return true;
+ }
// Since we split the inner loop latch on this induction variable. Make sure
// we do not have any instruction between the induction variable and branch
@@ -827,19 +846,24 @@ bool LoopInterchangeLegality::currentLimitations() {
for (const Instruction &I : reverse(*InnerLoopLatch)) {
if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
continue;
+
// We found an instruction. If this is not induction variable then it is not
// safe to split this loop latch.
- if (!I.isIdenticalTo(InnerIndexVarInc))
+ if (!I.isIdenticalTo(InnerIndexVarInc)) {
+ DEBUG(dbgs() << "Found unsupported instructions between induction "
+ << "variable increment and branch.\n");
return true;
+ }
FoundInduction = true;
break;
}
// The loop latch ended and we didn't find the induction variable return as
// current limitation.
- if (!FoundInduction)
+ if (!FoundInduction) {
+ DEBUG(dbgs() << "Did not find the induction variable.\n");
return true;
-
+ }
return false;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7312d97f8efe1..3506ac343d594 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -485,10 +485,22 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
DomTreeNode *Node = HeaderChildren[I];
BasicBlock *BB = Node->getBlock();
- pred_iterator PI = pred_begin(BB);
- BasicBlock *NearestDom = *PI;
- for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
- NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+ BasicBlock *NearestDom = nullptr;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ // Consider only reachable basic blocks.
+ if (!DT->getNode(Pred))
+ continue;
+
+ if (!NearestDom) {
+ NearestDom = Pred;
+ continue;
+ }
+
+ NearestDom = DT->findNearestCommonDominator(NearestDom, Pred);
+ assert(NearestDom && "No NearestCommonDominator found");
+ }
+
+ assert(NearestDom && "Nearest dominator not found");
// Remember if this changes the DomTree.
if (Node->getIDom()->getBlock() != NearestDom) {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73436f13c94e4..3638da118cb7e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -140,6 +140,13 @@ static cl::opt<bool> LSRExpNarrow(
cl::desc("Narrow LSR complex solution using"
" expectation of registers number"));
+// Flag to narrow search space by filtering non-optimal formulae with
+// the same ScaledReg and Scale.
+static cl::opt<bool> FilterSameScaledReg(
+ "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
+ cl::desc("Narrow LSR search space by filtering non-optimal formulae"
+ " with the same ScaledReg and Scale"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1902,6 +1909,7 @@ class LSRInstance {
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
void NarrowSearchSpaceByDeletingCostlyFormulas();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
@@ -2318,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() {
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
const ConstantInt *C = D->getValue();
// Stride of one or negative one can have reuse with non-addresses.
- if (C->isOne() || C->isAllOnesValue())
+ if (C->isOne() || C->isMinusOne())
goto decline_post_inc;
// Avoid weird situations.
if (C->getValue().getMinSignedBits() >= 64 ||
@@ -4306,6 +4314,104 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
+/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
+/// Pick the best one and delete the others.
+/// This narrowing heuristic is to keep as many formulae with different
+/// Scale and ScaledReg pair as possible while narrowing the search space.
+/// The benefit is that it is more likely to find out a better solution
+/// from a formulae set with more Scale and ScaledReg variations than
+/// a formulae set with the same Scale and ScaledReg. The picking winner
+/// reg heurstic will often keep the formulae with the same Scale and
+/// ScaledReg and filter others, and we want to avoid that if possible.
+void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
+
+ DEBUG(dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by choosing the best Formula "
+ "from the Formulae with the same Scale and ScaledReg.\n");
+
+ // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
+ typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy;
+ BestFormulaeTy BestFormulae;
+#ifndef NDEBUG
+ bool ChangedFormulae = false;
+#endif
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+ // Return true if Formula FA is better than Formula FB.
+ auto IsBetterThan = [&](Formula &FA, Formula &FB) {
+ // First we will try to choose the Formula with fewer new registers.
+ // For a register used by current Formula, the more the register is
+ // shared among LSRUses, the less we increase the register number
+ // counter of the formula.
+ size_t FARegNum = 0;
+ for (const SCEV *Reg : FA.BaseRegs) {
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
+ FARegNum += (NumUses - UsedByIndices.count() + 1);
+ }
+ size_t FBRegNum = 0;
+ for (const SCEV *Reg : FB.BaseRegs) {
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
+ FBRegNum += (NumUses - UsedByIndices.count() + 1);
+ }
+ if (FARegNum != FBRegNum)
+ return FARegNum < FBRegNum;
+
+ // If the new register numbers are the same, choose the Formula with
+ // less Cost.
+ Cost CostFA, CostFB;
+ Regs.clear();
+ CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
+ Regs.clear();
+ CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
+ return CostFA.isLess(CostFB, TTI);
+ };
+
+ bool Any = false;
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
+ ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
+ if (!F.ScaledReg)
+ continue;
+ auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
+ if (P.second)
+ continue;
+
+ Formula &Best = LU.Formulae[P.first->second];
+ if (IsBetterThan(F, Best))
+ std::swap(F, Best);
+ DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula ";
+ Best.print(dbgs()); dbgs() << '\n');
+#ifndef NDEBUG
+ ChangedFormulae = true;
+#endif
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
+ }
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+
+ // Reset this to prepare for the next use.
+ BestFormulae.clear();
+ }
+
+ DEBUG(if (ChangedFormulae) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
+}
+
/// The function delete formulas with high registers number expectation.
/// Assuming we don't know the value of each formula (already delete
/// all inefficient), generate probability of not selecting for each
@@ -4516,6 +4622,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ if (FilterSameScaledReg)
+ NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
if (LSRExpNarrow)
NarrowSearchSpaceByDeletingCostlyFormulas();
else
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index acd3ef6791bed..6727cf0179c18 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -238,7 +238,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
&BB->front());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
- if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+ if (MD && NewPN->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(NewPN);
return NewPN;
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 9cf01c6582b58..9d018563618ea 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -866,9 +866,7 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
// Things in TOPClass are equivalent to everything.
if (ValueToClass.lookup(*U) == TOPClass)
return false;
- if (lookupOperandLeader(*U) == PN)
- return false;
- return true;
+ return lookupOperandLeader(*U) != PN;
});
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
@@ -2063,9 +2061,10 @@ Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const {
//
// The invariants of this function are:
//
-// I must be moving to NewClass from OldClass The StoreCount of OldClass and
-// NewClass is expected to have been updated for I already if it is is a store.
-// The OldClass memory leader has not been updated yet if I was the leader.
+// - I must be moving to NewClass from OldClass
+// - The StoreCount of OldClass and NewClass is expected to have been updated
+// for I already if it is is a store.
+// - The OldClass memory leader has not been updated yet if I was the leader.
void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
MemoryAccess *InstMA,
CongruenceClass *OldClass,
@@ -2074,7 +2073,8 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
// be the MemoryAccess of OldClass.
assert((!InstMA || !OldClass->getMemoryLeader() ||
OldClass->getLeader() != I ||
- OldClass->getMemoryLeader() == InstMA) &&
+ MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) ==
+ MemoryAccessToClass.lookup(InstMA)) &&
"Representative MemoryAccess mismatch");
// First, see what happens to the new class
if (!NewClass->getMemoryLeader()) {
@@ -2136,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
<< NewClass->getID() << " from " << *NewClass->getLeader()
<< " to " << *SI << " because store joined class\n");
// If we changed the leader, we have to mark it changed because we don't
- // know what it will do to symbolic evlauation.
+ // know what it will do to symbolic evaluation.
NewClass->setLeader(SI);
}
// We rely on the code below handling the MemoryAccess change.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index cdba0062953f1..29d1ba406ae49 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -2148,7 +2148,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
if (I->getOpcode() == Instruction::Mul &&
cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Ops.back().Op) &&
- cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
ValueEntry Tmp = Ops.pop_back_val();
Ops.insert(Ops.begin(), Tmp);
} else if (I->getOpcode() == Instruction::FMul &&
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index a73e9aec06170..f19d45329d238 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1994,7 +1994,7 @@ static void rematerializeLiveValues(CallSite CS,
Instruction *LastClonedValue = nullptr;
Instruction *LastValue = nullptr;
for (Instruction *Instr: ChainToBase) {
- // Only GEP's and casts are suported as we need to be careful to not
+ // Only GEP's and casts are supported as we need to be careful to not
// introduce any new uses of pointers not in the liveset.
// Note that it's fine to introduce new uses of pointers which were
// otherwise not used after this statepoint.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 7a6fa1711411d..a738ebb4607e4 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -963,7 +963,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
} else {
// X or -1 = -1
if (ConstantInt *CI = NonOverdefVal->getConstantInt())
- if (CI->isAllOnesValue())
+ if (CI->isMinusOne())
return markConstant(IV, &I, NonOverdefVal->getConstant());
}
}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 4729f4ef59567..b9cee5b2ba956 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1673,8 +1673,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
// See if we need inttoptr for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
- if (OldTy->getScalarType()->isIntegerTy() &&
- NewTy->getScalarType()->isPointerTy()) {
+ if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
// Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
@@ -1690,8 +1689,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
// See if we need ptrtoint for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
- if (OldTy->getScalarType()->isPointerTy() &&
- NewTy->getScalarType()->isIntegerTy()) {
+ if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
// Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
@@ -2400,7 +2398,7 @@ private:
LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
if (LI.isVolatile())
- NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
// Any !nonnull metadata or !range metadata on the old load is also valid
// on the new load. This is even true in some cases even when the loads
@@ -2435,7 +2433,7 @@ private:
getSliceAlign(TargetTy),
LI.isVolatile(), LI.getName());
if (LI.isVolatile())
- NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
V = NewLI;
IsPtrAdjusted = true;
@@ -2578,7 +2576,7 @@ private:
}
NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
if (SI.isVolatile())
- NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
+ NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 486f3e5a43d49..0cccb415efdb1 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -329,7 +329,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
Loops[Exit] = N->getEntry();
} else {
- // Test for sucessors as back edge
+ // Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 314c990293cc5..7e75e88477852 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -46,13 +46,21 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-
+ Module *TheModule = F ? F->getParent() : nullptr;
+
// Loop over all instructions, and copy them over.
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
- if (DIFinder && F->getParent() && II->getDebugLoc())
- DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get());
+ if (DIFinder && TheModule) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
+ DIFinder->processDeclare(*TheModule, DDI);
+ else if (auto *DVI = dyn_cast<DbgValueInst>(II))
+ DIFinder->processValue(*TheModule, DVI);
+
+ if (auto DbgLoc = II->getDebugLoc())
+ DIFinder->processLocation(*TheModule, DbgLoc.get());
+ }
Instruction *NewInst = II->clone();
if (II->hasName())
@@ -153,6 +161,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// When we remap instructions, we want to avoid duplicating inlined
// DISubprograms, so record all subprograms we find as we duplicate
// instructions and then freeze them in the MD map.
+ // We also record information about dbg.value and dbg.declare to avoid
+ // duplicating the types.
DebugInfoFinder DIFinder;
// Loop over all of the basic blocks in the function, cloning them as
@@ -193,6 +203,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
}
}
+ for (auto *Type : DIFinder.types()) {
+ VMap.MD()[Type].reset(Type);
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 9f4d9c7e39810..d9294c4993091 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
break;
case ICmpInst::ICMP_SGT:
// X > -1 is equivalent to (X & SignMask) == 0.
- if (!C->isAllOnesValue())
+ if (!C->isMinusOne())
return false;
Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
Pred = ICmpInst::ICMP_EQ;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 30d8856cfbef1..1189714dfab10 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1116,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
- //cerr << "NEW FUNCTION: " << *newFunction;
- // verifyFunction(*newFunction);
-
- // cerr << "OLD FUNCTION: " << *oldFunction;
- // verifyFunction(*oldFunction);
-
DEBUG(if (verifyFunction(*newFunction))
report_fatal_error("verifyFunction failed!"));
return newFunction;
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index c97e544e620a9..1328f2f3ec012 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = GV->getValueType();
- if (!Size->isAllOnesValue() &&
+ if (!Size->isMinusOne() &&
Size->getValue().getLimitedValue() >=
DL.getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 0457294361b56..4a2be3a531767 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -513,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
return Res;
- if (int Res =
- cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+ if (int Res = cmpNumbers(LI->getSyncScopeID(),
+ cast<LoadInst>(R)->getSyncScopeID()))
return Res;
return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
@@ -529,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
return Res;
- return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+ return cmpNumbers(SI->getSyncScopeID(),
+ cast<StoreInst>(R)->getSyncScopeID());
}
if (const CmpInst *CI = dyn_cast<CmpInst>(L))
return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
@@ -584,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
return Res;
- return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+ return cmpNumbers(FI->getSyncScopeID(),
+ cast<FenceInst>(R)->getSyncScopeID());
}
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
if (int Res = cmpNumbers(CXI->isVolatile(),
@@ -601,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
cmpOrderings(CXI->getFailureOrdering(),
cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
return Res;
- return cmpNumbers(CXI->getSynchScope(),
- cast<AtomicCmpXchgInst>(R)->getSynchScope());
+ return cmpNumbers(CXI->getSyncScopeID(),
+ cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
}
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
if (int Res = cmpNumbers(RMWI->getOperation(),
@@ -614,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpOrderings(RMWI->getOrdering(),
cast<AtomicRMWInst>(R)->getOrdering()))
return Res;
- return cmpNumbers(RMWI->getSynchScope(),
- cast<AtomicRMWInst>(R)->getSynchScope());
+ return cmpNumbers(RMWI->getSyncScopeID(),
+ cast<AtomicRMWInst>(R)->getSyncScopeID());
}
if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
const PHINode *PNR = cast<PHINode>(R);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 5127eba3f9aea..74610613001c6 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1662,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
TI->eraseFromParent();
}
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocks - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise.
+/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
+/// after modifying the CFG.
bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
SmallPtrSet<BasicBlock*, 16> Reachable;
bool Changed = markAliveBlocks(F, Reachable);
@@ -2168,6 +2169,9 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
return true;
case Instruction::Call:
case Instruction::Invoke:
+ // Can't handle inline asm. Skip it.
+ if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue()))
+ return false;
// Many arithmetic intrinsics have no issue taking a
// variable, however it's hard to distingish these from
// specials such as @llvm.frameaddress that require a constant.
@@ -2182,12 +2186,18 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
case Instruction::ShuffleVector:
// Shufflevector masks are constant.
return OpIdx != 2;
+ case Instruction::Switch:
case Instruction::ExtractValue:
- case Instruction::InsertValue:
// All operands apart from the first are constant.
return OpIdx == 0;
+ case Instruction::InsertValue:
+ // All operands apart from the first and the second are constant.
+ return OpIdx < 2;
case Instruction::Alloca:
- return false;
+ // Static allocas (constant size in the entry block) are handled by
+ // prologue/epilogue insertion so they're free anyway. We definitely don't
+ // want to make them non-constant.
+ return !dyn_cast<AllocaInst>(I)->isStaticAlloca();
case Instruction::GetElementPtr:
if (OpIdx == 0)
return true;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 9ad2b707e6b23..5170c68e2915a 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -65,9 +65,11 @@ static cl::opt<bool> UnrollRuntimeMultiExit(
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
- BasicBlock *PrologExit, BasicBlock *PreHeader,
- BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *PrologExit,
+ BasicBlock *OriginalLoopLatchExit,
+ BasicBlock *PreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// then (BECount + 1) cannot unsigned-overflow.
Value *BrLoopExit =
B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
- BasicBlock *Exit = L->getUniqueExitBlock();
- assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
+ SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
+ SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
+ B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
InsertPt->eraseFromParent();
if (DT)
- DT->changeImmediateDominator(Exit, PrologExit);
+ DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
}
/// Connect the unrolling epilog code to the original loop.
@@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
return nullptr;
}
+/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
+/// is populated with all the loop exit blocks other than the LatchExit block.
+static bool
+canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
+ BasicBlock *LatchExit, bool PreserveLCSSA,
+ bool UseEpilogRemainder) {
+
+ // Support runtime unrolling for multiple exit blocks and multiple exiting
+ // blocks.
+ if (!UnrollRuntimeMultiExit)
+ return false;
+ // Even if runtime multi exit is enabled, we currently have some correctness
+ // constrains in unrolling a multi-exit loop.
+ // We rely on LCSSA form being preserved when the exit blocks are transformed.
+ if (!PreserveLCSSA)
+ return false;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ for (auto *BB : Exits)
+ if (BB != LatchExit)
+ OtherExits.push_back(BB);
+
+ // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
+ // UnrollRuntimeMultiExit is true. This will need updating the logic in
+ // connectEpilog/connectProlog.
+ if (!LatchExit->getSinglePredecessor()) {
+ DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
+ "predecessor.\n");
+ return false;
+ }
+ // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
+ // and L is an inner loop. This is because in presence of multiple exits, the
+ // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
+ // outer loop. This is automatically handled in the prolog case, so we do not
+ // have that bug in prolog generation.
+ if (UseEpilogRemainder && L->getParentLoop())
+ return false;
+
+ // All constraints have been satisfied.
+ return true;
+}
+
+
+
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
@@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool UseEpilogRemainder,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA) {
- // for now, only unroll loops that contain a single exit
- if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
- return false;
+ DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
+ DEBUG(L->dump());
// Make sure the loop is in canonical form.
- if (!L->isLoopSimplifyForm())
+ if (!L->isLoopSimplifyForm()) {
+ DEBUG(dbgs() << "Not in simplify form!\n");
return false;
+ }
// Guaranteed by LoopSimplifyForm.
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *Header = L->getHeader();
- BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
- if (!LatchExit && !UnrollRuntimeMultiExit)
- return false;
- // These are exit blocks other than the target of the latch exiting block.
- SmallVector<BasicBlock *, 4> OtherExits;
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+ unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+ BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
// Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
// targets of the Latch be an exit block out of the loop. This needs
// to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+ assert(!L->contains(LatchExit) &&
"one of the loop latch successors should be the exit block!");
- // Support runtime unrolling for multiple exit blocks and multiple exiting
- // blocks.
- if (!LatchExit) {
- assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
- "unrolling with epilog remainder only!");
- LatchExit = LatchBR->getSuccessor(ExitIndex);
- // We rely on LCSSA form being preserved when the exit blocks are
- // transformed.
- if (!PreserveLCSSA)
- return false;
- // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
- // will need updating the logic in connectEpilog.
- if (!LatchExit->getSinglePredecessor())
- return false;
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueExitBlocks(Exits);
- for (auto *BB : Exits)
- if (BB != LatchExit)
- OtherExits.push_back(BB);
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(
+ L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder);
+ // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
+ if (!isMultiExitUnrollingEnabled &&
+ (!L->getExitingBlock() || OtherExits.size())) {
+ DEBUG(
+ dbgs()
+ << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+ "enabled!\n");
+ return false;
}
-
- assert(LatchExit && "Latch Exit should exist!");
-
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// exiting blocks).
const SCEV *BECountSC = SE->getExitCount(L, Latch);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
- !BECountSC->getType()->isIntegerTy())
+ !BECountSC->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "Could not compute exit block SCEV\n");
return false;
+ }
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration.
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
- if (isa<SCEVCouldNotCompute>(TripCountSC))
+ if (isa<SCEVCouldNotCompute>(TripCountSC)) {
+ DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
return false;
+ }
BasicBlock *PreHeader = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
if (!AllowExpensiveTripCount &&
- Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
+ DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
return false;
+ }
// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below.
- if (Log2_32(Count) > BEWidth)
+ if (Log2_32(Count) > BEWidth) {
+ DEBUG(dbgs()
+ << "Count failed constraint on overflow trip count calculation.\n");
return false;
+ }
// Loop structure is the following:
//
@@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// node.
for (unsigned i =0; i < oldNumOperands; i++){
Value *newVal = VMap[Phi->getIncomingValue(i)];
- if (!newVal) {
- assert(isa<Constant>(Phi->getIncomingValue(i)) &&
- "VMap should exist for all values except constants!");
+ // newVal can be a constant or derived from values outside the loop, and
+ // hence need not have a VMap value.
+ if (!newVal)
newVal = Phi->getIncomingValue(i);
- }
Phi->addIncoming(newVal,
cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
}
@@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
} else {
// Connect the prolog code to the original loop and update the
// PHI functions.
- ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
- VMap, DT, LI, PreserveLCSSA);
+ ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA);
}
// If this loop is nested, then the loop unroller changes the code in the
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 1c2a60a6b8b24..900450b400612 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -8,12 +8,256 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+static unsigned getLoopOperandSizeInBytes(Type *Type) {
+ if (VectorType *VTy = dyn_cast<VectorType>(Type)) {
+ return VTy->getBitWidth() / 8;
+ }
+
+ return Type->getPrimitiveSizeInBits() / 8;
+}
+
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+ Value *DstAddr, ConstantInt *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile,
+ const TargetTransformInfo &TTI) {
+ // No need to expand zero length copies.
+ if (CopyLen->isZero())
+ return;
+
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ Type *TypeOfCopyLen = CopyLen->getType();
+ Type *LoopOpType =
+ TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+
+ unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+ uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ if (LoopEndCount != 0) {
+ // Split
+ PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ // Cast the Src and Dst pointers to pointers to the loop operand type (if
+ // needed).
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
+ // Loop Body
+ Value *SrcGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ // Create the loop branch condition.
+ Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
+ LoopBB, PostLoopBB);
+ }
+
+ uint64_t BytesCopied = LoopEndCount * LoopOpSize;
+ uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
+ if (RemainingBytes) {
+ IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
+ : InsertBefore);
+
+ // Update the alignment based on the copy size used in the loop body.
+ SrcAlign = std::min(SrcAlign, LoopOpSize);
+ DestAlign = std::min(DestAlign, LoopOpSize);
+
+ SmallVector<Type *, 5> RemainingOps;
+ TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
+ SrcAlign, DestAlign);
+
+ for (auto OpTy : RemainingOps) {
+ // Calaculate the new index
+ unsigned OperandSize = getLoopOperandSizeInBytes(OpTy);
+ uint64_t GepIndex = BytesCopied / OperandSize;
+ assert(GepIndex * OperandSize == BytesCopied &&
+ "Division should have no Remainder!");
+ // Cast source to operand type and load
+ PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
+ Value *CastedSrc = SrcAddr->getType() == SrcPtrType
+ ? SrcAddr
+ : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+ Value *SrcGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+
+ // Cast destination to operand type and store.
+ PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
+ Value *CastedDst = DstAddr->getType() == DstPtrType
+ ? DstAddr
+ : RBuilder.CreateBitCast(DstAddr, DstPtrType);
+ Value *DstGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ BytesCopied += OperandSize;
+ }
+ }
+ assert(BytesCopied == CopyLen->getZExtValue() &&
+ "Bytes copied should match size in the call!");
+}
+
+void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr,
+ Value *CopyLen, unsigned SrcAlign,
+ unsigned DestAlign, bool SrcIsVolatile,
+ bool DstIsVolatile,
+ const TargetTransformInfo &TTI) {
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB =
+ PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
+
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ Type *LoopOpType =
+ TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+ unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ // Calculate the loop trip count, and remaining bytes to copy after the loop.
+ Type *CopyLenType = CopyLen->getType();
+ IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
+ assert(ILengthType &&
+ "expected size argument to memcpy to be an integer type!");
+ ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
+ Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+ IRBuilder<> LoopBuilder(LoopBB);
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
+
+ Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ Type *Int8Type = Type::getInt8Ty(Ctx);
+ if (LoopOpType != Int8Type) {
+ // Loop body for the residual copy.
+ BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
+ PreLoopBB->getParent(), nullptr);
+ // Residual loop header.
+ BasicBlock *ResHeaderBB = BasicBlock::Create(
+ Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
+
+ // Need to update the pre-loop basic block to branch to the correct place.
+ // branch to the main loop if the count is non-zero, branch to the residual
+ // loop if the copy size is smaller then 1 iteration of the main loop but
+ // non-zero and finally branch to after the residual loop if the memcpy
+ // size is zero.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, ResHeaderBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ ResHeaderBB);
+
+ // Determine if we need to branch to the residual loop or bypass it.
+ IRBuilder<> RHBuilder(ResHeaderBB);
+ RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
+ ResLoopBB, PostLoopBB);
+
+ // Copy the residual with single byte load/store loop.
+ IRBuilder<> ResBuilder(ResLoopBB);
+ PHINode *ResidualIndex =
+ ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
+ ResidualIndex->addIncoming(Zero, ResHeaderBB);
+
+ Value *SrcAsInt8 =
+ ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
+ Value *DstAsInt8 =
+ ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
+ Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
+ Value *SrcGEP =
+ ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
+ Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP =
+ ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
+ ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *ResNewIndex =
+ ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
+ ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
+
+ // Create the loop branch condition.
+ ResBuilder.CreateCondBr(
+ ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
+ PostLoopBB);
+ } else {
+ // In this case the loop operand type was a byte, and there is no need for a
+ // residual loop to copy the remaining memory after the main loop.
+ // We do however need to patch up the control flow by creating the
+ // terminators for the preloop block and the memcpy loop.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, PostLoopBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ PostLoopBB);
+ }
+}
+
void llvm::createMemCpyLoop(Instruction *InsertBefore,
Value *SrcAddr, Value *DstAddr, Value *CopyLen,
unsigned SrcAlign, unsigned DestAlign,
@@ -208,15 +452,41 @@ static void createMemSetLoop(Instruction *InsertBefore,
NewBB);
}
-void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) {
- createMemCpyLoop(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile());
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
+ const TargetTransformInfo &TTI) {
+ // Original implementation
+ if (!TTI.useWideIRMemcpyLoopLowering()) {
+ createMemCpyLoop(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile());
+ } else {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+ createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransformInfo */ TTI);
+ } else {
+ createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransfomrInfo */ TTI);
+ }
+ }
}
void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e724b0a28c322..dee658f983932 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5754,8 +5754,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *Dom = BB->getSinglePredecessor()) {
auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
if (PBI && PBI->isConditional() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
- (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) {
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
bool CondIsFalse = PBI->getSuccessor(1) == BB;
Optional<bool> Implication = isImpliedCondition(
PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index ec8b0d426265a..6d90e6b48358a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -80,6 +81,7 @@ namespace {
bool IsSigned);
bool eliminateSDiv(BinaryOperator *SDiv);
bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
+ bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
};
}
@@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
+ ICmpInst::Predicate OriginalPred = Pred;
if (IVOperand != ICmp->getOperand(0)) {
// Swapped
assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
@@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
ICmp->setPredicate(InvariantPredicate);
ICmp->setOperand(0, NewLHS);
ICmp->setOperand(1, NewRHS);
+ } else if (ICmpInst::isSigned(OriginalPred) &&
+ SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+ // If we were unable to make anything above, all we can is to canonicalize
+ // the comparison hoping that it will open the doors for other
+ // optimizations. If we find out that we compare two non-negative values,
+ // we turn the instruction's predicate to its unsigned version. Note that
+ // we cannot rely on Pred here unless we check if we have swapped it.
+ assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+ DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
} else
return;
@@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
return Changed;
}
+/// Annotate the Shr in (X << IVOperand) >> C as exact using the
+/// information from the IV's range. Returns true if anything changed, false
+/// otherwise.
+bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
+ Value *IVOperand) {
+ using namespace llvm::PatternMatch;
+
+ if (BO->getOpcode() == Instruction::Shl) {
+ bool Changed = false;
+ ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
+ for (auto *U : BO->users()) {
+ const APInt *C;
+ if (match(U,
+ m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
+ match(U,
+ m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
+ BinaryOperator *Shr = cast<BinaryOperator>(U);
+ if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
+ Shr->setIsExact(true);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+ }
+
+ return false;
+}
+
/// Add all uses of Def to the current IV's worklist.
static void pushIVUsers(
Instruction *Def,
@@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
}
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
- if (isa<OverflowingBinaryOperator>(BO) &&
- strengthenOverflowingOperation(BO, IVOperand)) {
+ if ((isa<OverflowingBinaryOperator>(BO) &&
+ strengthenOverflowingOperation(BO, IVOperand)) ||
+ (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
// re-queue uses of the now modified binary operator and fall
// through to the checks that remain.
pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b723b65f35e59..77c0a41929ac7 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -656,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
// memchr(x, y, 0) -> null
- if (LenC && LenC->isNullValue())
+ if (LenC && LenC->isZero())
return Constant::getNullValue(CI->getType());
// From now on we need at least constant length and string.
@@ -2280,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
return true;
if (ConstantInt *ObjSizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
- if (ObjSizeCI->isAllOnesValue())
+ if (ObjSizeCI->isMinusOne())
return true;
// If the object size wasn't -1 (unknown), bail out if we were asked to.
if (OnlyLowerUnknownSize)
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 60d9ede2c4871..c3feea6a0a414 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
// If the store and reload are the same size, we can always reuse it.
if (StoredValSize == LoadedValSize) {
// Pointer to Pointer -> use bitcast.
- if (StoredValTy->getScalarType()->isPointerTy() &&
- LoadedTy->getScalarType()->isPointerTy()) {
+ if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
} else {
// Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->getScalarType()->isPointerTy()) {
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
}
Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->getScalarType()->isPointerTy())
+ if (TypeToCastTo->isPtrOrPtrVectorTy())
TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
if (StoredValTy != TypeToCastTo)
StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
// Cast to pointer if the load needs a pointer type.
- if (LoadedTy->getScalarType()->isPointerTy())
+ if (LoadedTy->isPtrOrPtrVectorTy())
StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
}
@@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
"canCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->getScalarType()->isPointerTy()) {
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
}
@@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
if (LoadedTy != NewIntTy) {
// If the result is a pointer, inttoptr.
- if (LoadedTy->getScalarType()->isPointerTy())
+ if (LoadedTy->isPtrOrPtrVectorTy())
StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
else
// Otherwise, bitcast.
@@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (SrcVal->getType()->getScalarType()->isPointerTy())
+ if (SrcVal->getType()->isPtrOrPtrVectorTy())
SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 193cc4d137870..eb82ee283d449 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5315,8 +5315,13 @@ void LoopVectorizationLegality::addInductionPhi(
// Both the PHI node itself, and the "post-increment" value feeding
// back into the PHI node may have external users.
- AllowedExit.insert(Phi);
- AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+ // We can allow those uses, except if the SCEVs we have for them rely
+ // on predicates that only hold within the loop, since allowing the exit
+ // currently means re-using this SCEV outside the loop.
+ if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ AllowedExit.insert(Phi);
+ AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+ }
DEBUG(dbgs() << "LV: Found an induction variable.\n");
return;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b494526369d6a..4425043ad39a0 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -860,7 +860,7 @@ private:
bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
/// Un-bundles a group of instructions.
- void cancelScheduling(ArrayRef<Value *> VL);
+ void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
/// Extends the scheduling region so that V is inside the region.
/// \returns true if the region size is within the limit.
@@ -1258,7 +1258,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1284,7 +1284,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Reuse) {
DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
} else {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
}
newTreeEntry(VL, Reuse, UserTreeIdx);
return;
@@ -1301,7 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
@@ -1312,7 +1312,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
@@ -1349,7 +1349,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
break;
}
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
if (ReverseConsecutive) {
@@ -1376,7 +1376,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned i = 0; i < VL.size(); ++i) {
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
@@ -1404,7 +1404,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CmpInst *Cmp = cast<CmpInst>(VL[i]);
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
@@ -1471,7 +1471,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1484,7 +1484,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1496,7 +1496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (!isa<ConstantInt>(Op)) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1518,7 +1518,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
@@ -1541,7 +1541,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// represented by an intrinsic call
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
@@ -1555,7 +1555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (!CI2 || CI2->getCalledFunction() != Int ||
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
@@ -1566,7 +1566,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
@@ -1579,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
!std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
<< *VL[i] << '\n');
@@ -1603,7 +1603,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!isAltShuffle) {
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
@@ -1631,7 +1631,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
default:
- BS.cancelScheduling(VL);
+ BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
@@ -3177,17 +3177,18 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
}
}
if (!Bundle->isReady()) {
- cancelScheduling(VL);
+ cancelScheduling(VL, VL[0]);
return false;
}
return true;
}
-void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
- if (isa<PHINode>(VL[0]))
+void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
+ Value *OpValue) {
+ if (isa<PHINode>(OpValue))
return;
- ScheduleData *Bundle = getScheduleData(VL[0]);
+ ScheduleData *Bundle = getScheduleData(OpValue);
DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
assert(!Bundle->IsScheduled &&
"Can't cancel bundle which is already scheduled");