summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
commit9df3605dea17e84f8183581f6103bd0c79e2a606 (patch)
tree70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib
parent08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff)
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp15
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp29
-rw-r--r--lib/Analysis/InlineCost.cpp33
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp12
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp4
-rw-r--r--lib/Analysis/OptimizationDiagnosticInfo.cpp13
-rw-r--r--lib/Analysis/RegionInfo.cpp40
-rw-r--r--lib/Analysis/ScalarEvolution.cpp243
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp9
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp20
-rw-r--r--lib/BinaryFormat/Magic.cpp4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp32
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp44
-rw-r--r--lib/Bitcode/Writer/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp22
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h9
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp111
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp67
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp20
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp68
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp24
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp57
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp68
-rw-r--r--lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp6
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp10
-rw-r--r--lib/CodeGen/MacroFusion.cpp27
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp170
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp9
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp36
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp11
-rw-r--r--lib/DebugInfo/CodeView/CVSymbolVisitor.cpp28
-rw-r--r--lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp16
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp6
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugLinesSubsection.cpp13
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp11
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp24
-rw-r--r--lib/DebugInfo/CodeView/Formatters.cpp8
-rw-r--r--lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp18
-rw-r--r--lib/DebugInfo/CodeView/StringsAndChecksums.cpp8
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp11
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp19
-rw-r--r--lib/DebugInfo/CodeView/TypeSerializer.cpp36
-rw-r--r--lib/DebugInfo/DWARF/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp42
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp87
-rw-r--r--lib/DebugInfo/DWARF/DWARFDataExtractor.cpp24
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp4
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp22
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp8
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp11
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp20
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp42
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp73
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleList.cpp11
-rw-r--r--lib/DebugInfo/PDB/Native/Hash.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/HashTable.cpp16
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp0
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumModules.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp29
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp21
-rw-r--r--lib/DebugInfo/PDB/PDB.cpp12
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp4
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp18
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h10
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp4
-rw-r--r--lib/IR/Constants.cpp2
-rw-r--r--lib/IR/Dominators.cpp18
-rw-r--r--lib/IR/LLVMContext.cpp15
-rw-r--r--lib/IR/LLVMContextImpl.h3
-rw-r--r--lib/LTO/LTO.cpp38
-rw-r--r--lib/MC/MCAssembler.cpp6
-rw-r--r--lib/MC/WasmObjectWriter.cpp24
-rw-r--r--lib/Object/CMakeLists.txt1
-rw-r--r--lib/Object/COFFObjectFile.cpp25
-rw-r--r--lib/Object/IRSymtab.cpp48
-rw-r--r--lib/Object/WasmObjectFile.cpp16
-rw-r--r--lib/Object/WindowsResource.cpp2
-rw-r--r--lib/ObjectYAML/COFFYAML.cpp62
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp53
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp42
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp73
-rw-r--r--lib/ObjectYAML/DWARFEmitter.cpp40
-rw-r--r--lib/ObjectYAML/DWARFYAML.cpp4
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp18
-rw-r--r--lib/ObjectYAML/MachOYAML.cpp50
-rw-r--r--lib/ObjectYAML/ObjectYAML.cpp9
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp10
-rw-r--r--lib/ObjectYAML/YAML.cpp5
-rw-r--r--lib/Passes/PassBuilder.cpp43
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp54
-rw-r--r--lib/ProfileData/InstrProf.cpp20
-rw-r--r--lib/Support/AMDGPUCodeObjectMetadata.cpp2
-rw-r--r--lib/Support/Host.cpp10
-rw-r--r--lib/Support/MemoryBuffer.cpp13
-rw-r--r--lib/Support/TargetParser.cpp36
-rw-r--r--lib/Support/Unix/Path.inc12
-rw-r--r--lib/Support/Unix/Process.inc12
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp7
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp48
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp5
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp9
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp15
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td1221
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h1
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp77
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp32
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp123
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h38
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp18
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp65
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp37
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h43
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h3
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp60
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp18
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td4
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td8
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp44
-rw-r--r--lib/Target/AMDGPU/SITypeRewriter.cpp156
-rw-r--r--lib/Target/ARM/ARM.td32
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp45
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp15
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp50
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp43
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td4
-rw-r--r--lib/Target/ARM/ARMSchedule.td1
-rw-r--r--lib/Target/ARM/ARMScheduleM3.td21
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp77
-rw-r--r--lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp138
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp34
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h2
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp17
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h83
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp47
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h15
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp312
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp181
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h17
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp77
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp63
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h3
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.h8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp15
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp88
-rw-r--r--lib/Target/Mips/MicroMips64r6InstrInfo.td12
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td12
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp135
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h3
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp176
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h34
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/PPC.h6
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp108
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp24
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp12
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp16
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--lib/Target/SystemZ/README.txt5
-rw-r--r--lib/Target/SystemZ/SystemZ.td1
-rw-r--r--lib/Target/SystemZ/SystemZFeatures.td22
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td106
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td86
-rw-r--r--lib/Target/SystemZ/SystemZInstrSystem.td517
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td10
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td193
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td190
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td191
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h15
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp112
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/X86.td30
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp106
-rw-r--r--lib/Target/X86/X86InstrAVX512.td645
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp53
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp20
-rw-r--r--lib/Target/X86/X86Subtarget.cpp55
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp47
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h44
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp62
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp3
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp21
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp49
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h5
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp24
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp17
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp8
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp95
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp30
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp20
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp27
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp130
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp19
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp19
-rw-r--r--lib/Transforms/Utils/OrderedInstructions.cpp3
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp79
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp10
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp3282
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp675
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp84
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp3
282 files changed, 8010 insertions, 6473 deletions
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index ddd5123d0eff..0de7ad98af46 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -68,17 +68,6 @@ CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
: AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
CFLAndersAAResult::~CFLAndersAAResult() {}
-static const Function *parentFunctionOfValue(const Value *Val) {
- if (auto *Inst = dyn_cast<Instruction>(Val)) {
- auto *Bb = Inst->getParent();
- return Bb->getParent();
- }
-
- if (auto *Arg = dyn_cast<Argument>(Val))
- return Arg->getParent();
- return nullptr;
-}
-
namespace {
enum class MatchState : uint8_t {
@@ -789,10 +778,10 @@ void CFLAndersAAResult::scan(const Function &Fn) {
// resize and invalidating the reference returned by operator[]
auto FunInfo = buildInfoFrom(Fn);
Cache[&Fn] = std::move(FunInfo);
- Handles.push_front(FunctionHandle(const_cast<Function *>(&Fn), this));
+ Handles.emplace_front(const_cast<Function *>(&Fn), this);
}
-void CFLAndersAAResult::evict(const Function &Fn) { Cache.erase(&Fn); }
+void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
const Optional<CFLAndersAAResult::FunctionInfo> &
CFLAndersAAResult::ensureCached(const Function &Fn) {
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 6e4263920e58..adbdd82012a3 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -88,19 +88,6 @@ const StratifiedIndex StratifiedLink::SetSentinel =
//===----------------------------------------------------------------------===//
/// Determines whether it would be pointless to add the given Value to our sets.
-static bool canSkipAddingToSets(Value *Val);
-
-static Optional<Function *> parentFunctionOfValue(Value *Val) {
- if (auto *Inst = dyn_cast<Instruction>(Val)) {
- auto *Bb = Inst->getParent();
- return Bb->getParent();
- }
-
- if (auto *Arg = dyn_cast<Argument>(Val))
- return Arg->getParent();
- return None;
-}
-
static bool canSkipAddingToSets(Value *Val) {
// Constants can share instances, which may falsely unify multiple
// sets, e.g. in
@@ -245,7 +232,7 @@ void CFLSteensAAResult::scan(Function *Fn) {
auto FunInfo = buildSetsFrom(Fn);
Cache[Fn] = std::move(FunInfo);
- Handles.push_front(FunctionHandle(Fn, this));
+ Handles.emplace_front(Fn, this);
}
void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
@@ -281,9 +268,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
return NoAlias;
Function *Fn = nullptr;
- auto MaybeFnA = parentFunctionOfValue(ValA);
- auto MaybeFnB = parentFunctionOfValue(ValB);
- if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+ Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
+ Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
+ if (!MaybeFnA && !MaybeFnB) {
// The only times this is known to happen are when globals + InlineAsm are
// involved
DEBUG(dbgs()
@@ -291,12 +278,12 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
return MayAlias;
}
- if (MaybeFnA.hasValue()) {
- Fn = *MaybeFnA;
- assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+ if (MaybeFnA) {
+ Fn = MaybeFnA;
+ assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
"Interprocedural queries not supported");
} else {
- Fn = *MaybeFnB;
+ Fn = MaybeFnB;
}
assert(Fn != nullptr);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 77ad6f1e166f..35693666aa03 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -66,6 +66,12 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
+static cl::opt<int> ColdCallSiteRelFreq(
+ "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+ "entry frequency, for a callsite to be cold in the absence of "
+ "profile information."));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Return true if size growth is allowed when inlining the callee at CS.
bool allowSizeGrowth(CallSite CS);
+ /// Return true if \p CS is a cold callsite.
+ bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
return true;
}
+bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+ // If global profile summary is available, then callsite's coldness is
+ // determined based on that.
+ if (PSI->hasProfileSummary())
+ return PSI->isColdCallSite(CS, CallerBFI);
+ if (!CallerBFI)
+ return false;
+
+ // In the absence of global profile summary, determine if the callsite is cold
+ // relative to caller's entry. We could potentially cache the computation of
+ // scaled entry frequency, but the added complexity is not worth it unless
+ // this scaling shows up high in the profiles.
+ const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+ auto CallerEntryFreq =
+ CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
@@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (PSI->isHotCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Hot callsite.\n");
Threshold = Params.HotCallSiteThreshold.getValue();
- } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+ } else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}
@@ -1010,7 +1039,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
if (isa<ConstantInt>(V))
return true;
- // Assume the most general case where the swith is lowered into
+ // Assume the most general case where the switch is lowered into
// either a jump table, bit test, or a balanced binary tree consisting of
// case clusters without merging adjacent clusters with the same
// destination. We do not consider the switches that are lowered with a mix
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index 2a736ec0379c..0e02850df349 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -20,14 +20,6 @@ namespace llvm {
template <class NodeTy>
void IDFCalculator<NodeTy>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
- // If we haven't computed dominator tree levels, do so now.
- if (DomLevels.empty()) {
- for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
- DFI != DFE; ++DFI) {
- DomLevels[*DFI] = DFI.getPathLength() - 1;
- }
- }
-
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
@@ -37,7 +29,7 @@ void IDFCalculator<NodeTy>::calculate(
for (BasicBlock *BB : *DefBlocks) {
if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+ PQ.push({Node, Node->getLevel()});
}
SmallVector<DomTreeNode *, 32> Worklist;
@@ -72,7 +64,7 @@ void IDFCalculator<NodeTy>::calculate(
if (SuccNode->getIDom() == Node)
continue;
- unsigned SuccLevel = DomLevels.lookup(SuccNode);
+ const unsigned SuccLevel = SuccNode->getLevel();
if (SuccLevel > RootLevel)
continue;
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 7983d62c2f7a..f88d54b21e1e 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -400,8 +400,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
/// \brief Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
+/// If RoundToAlign is true, then Size is rounded up to the alignment of
+/// allocas, byval arguments, and global variables.
bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) {
ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts);
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index e38e530c052d..eb259fd7a384 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
: F(F), BFI(nullptr) {
- if (!F->getContext().getDiagnosticHotnessRequested())
+ if (!F->getContext().getDiagnosticsHotnessRequested())
return;
// First create a dominator tree.
@@ -155,6 +155,13 @@ void OptimizationRemarkEmitter::emit(
DiagnosticInfoOptimizationBase &OptDiagBase) {
auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase);
computeHotness(OptDiag);
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() <
+ F->getContext().getDiagnosticsHotnessThreshold()) {
+ return;
+ }
yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
if (Out) {
@@ -176,7 +183,7 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
BlockFrequencyInfo *BFI;
- if (Fn.getContext().getDiagnosticHotnessRequested())
+ if (Fn.getContext().getDiagnosticsHotnessRequested())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
else
BFI = nullptr;
@@ -198,7 +205,7 @@ OptimizationRemarkEmitterAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
BlockFrequencyInfo *BFI;
- if (F.getContext().getDiagnosticHotnessRequested())
+ if (F.getContext().getDiagnosticsHotnessRequested())
BFI = &AM.getResult<BlockFrequencyAnalysis>(F);
else
BFI = nullptr;
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 63ef8d28d44a..900487323005 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,28 +10,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfoImpl.h"
-#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#ifndef NDEBUG
#include "llvm/Analysis/RegionPrinter.h"
#endif
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "region"
namespace llvm {
+
template class RegionBase<RegionTraits<Function>>;
template class RegionNodeBase<RegionTraits<Function>>;
template class RegionInfoBase<RegionTraits<Function>>;
-}
+
+} // end namespace llvm
STATISTIC(numRegions, "The # of regions");
STATISTIC(numSimpleRegions, "The # of simple regions");
@@ -44,7 +45,6 @@ VerifyRegionInfoX(
cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo),
cl::desc("Verify region info (time consuming)"));
-
static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
cl::location(RegionInfo::printStyle),
cl::Hidden,
@@ -56,7 +56,6 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
clEnumValN(Region::PrintRN, "rn",
"print regions in detail with element_iterator")));
-
//===----------------------------------------------------------------------===//
// Region implementation
//
@@ -68,20 +67,15 @@ Region::Region(BasicBlock *Entry, BasicBlock *Exit,
}
-Region::~Region() { }
+Region::~Region() = default;
//===----------------------------------------------------------------------===//
// RegionInfo implementation
//
-RegionInfo::RegionInfo() :
- RegionInfoBase<RegionTraits<Function>>() {
-
-}
+RegionInfo::RegionInfo() = default;
-RegionInfo::~RegionInfo() {
-
-}
+RegionInfo::~RegionInfo() = default;
bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -126,9 +120,7 @@ RegionInfoPass::RegionInfoPass() : FunctionPass(ID) {
initializeRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-RegionInfoPass::~RegionInfoPass() {
-
-}
+RegionInfoPass::~RegionInfoPass() = default;
bool RegionInfoPass::runOnFunction(Function &F) {
releaseMemory();
@@ -181,10 +173,12 @@ INITIALIZE_PASS_END(RegionInfoPass, "regions",
// the link time optimization.
namespace llvm {
+
FunctionPass *createRegionInfoPass() {
return new RegionInfoPass();
}
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// RegionInfoAnalysis implementation
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 73a95ec405c7..678ad3af5e85 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -157,6 +157,11 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth(
"scalar-evolution-max-constant-evolving-depth", cl::Hidden,
cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
+static cl::opt<unsigned>
+ MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SExt/ZExt"),
+ cl::init(8));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -1285,8 +1290,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
namespace {
struct ExtendOpTraitsBase {
- typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(
- const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache);
+ typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
+ unsigned);
};
// Used to make code generic over signed and unsigned overflow.
@@ -1315,9 +1320,8 @@ struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy
- ExtendOpTraits<SCEVSignExtendExpr>::GetExtendExpr =
- &ScalarEvolution::getSignExtendExprCached;
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
template <>
struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
@@ -1332,9 +1336,8 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy
- ExtendOpTraits<SCEVZeroExtendExpr>::GetExtendExpr =
- &ScalarEvolution::getZeroExtendExprCached;
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
}
// The recurrence AR has been shown to have no signed/unsigned wrap or something
@@ -1346,8 +1349,7 @@ const ExtendOpTraitsBase::GetExtendExprTy
// "sext/zext(PostIncAR)"
template <typename ExtendOpTy>
static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
- ScalarEvolution *SE,
- ScalarEvolution::ExtendCacheTy &Cache) {
+ ScalarEvolution *SE, unsigned Depth) {
auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
@@ -1394,9 +1396,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
- SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache),
- (SE->*GetExtendExpr)(Step, WideTy, Cache));
- if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) {
+ SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
+ (SE->*GetExtendExpr)(Step, WideTy, Depth));
+ if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
if (PreAR && AR->getNoWrapFlags(WrapType)) {
// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
@@ -1422,16 +1424,16 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
template <typename ExtendOpTy>
static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
ScalarEvolution *SE,
- ScalarEvolution::ExtendCacheTy &Cache) {
+ unsigned Depth) {
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
- const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Cache);
+ const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
if (!PreStart)
- return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache);
+ return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
- return SE->getAddExpr(
- (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache),
- (SE->*GetExtendExpr)(PreStart, Ty, Cache));
+ return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
+ Depth),
+ (SE->*GetExtendExpr)(PreStart, Ty, Depth));
}
// Try to prove away overflow by looking at "nearby" add recurrences. A
@@ -1511,31 +1513,8 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
return false;
}
-const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) {
- // Use the local cache to prevent exponential behavior of
- // getZeroExtendExprImpl.
- ExtendCacheTy Cache;
- return getZeroExtendExprCached(Op, Ty, Cache);
-}
-
-/// Query \p Cache before calling getZeroExtendExprImpl. If there is no
-/// related entry in the \p Cache, call getZeroExtendExprImpl and save
-/// the result in the \p Cache.
-const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
- auto It = Cache.find({Op, Ty});
- if (It != Cache.end())
- return It->second;
- const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache);
- auto InsertResult = Cache.insert({{Op, Ty}, ZExt});
- assert(InsertResult.second && "Expect the key was not in the cache");
- (void)InsertResult;
- return ZExt;
-}
-
-/// The real implementation of getZeroExtendExpr.
-const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
+const SCEV *
+ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1545,11 +1524,11 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache);
+ return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
@@ -1559,6 +1538,12 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ if (Depth > MaxExtDepth) {
+ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+ }
// zext(trunc(x)) --> zext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1593,8 +1578,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// we don't need to do any further analysis.
if (AR->hasNoUnsignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1618,22 +1603,29 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
- const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *ZAdd =
- getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache);
- const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache);
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
+ SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
+ SCEV::FlagAnyWrap,
+ Depth + 1),
+ WideTy, Depth + 1);
+ const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
- getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache);
- const SCEV *OperandExtendedAdd = getAddExpr(
- WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached(
- Step, WideTy, Cache)));
+ getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getZeroExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
@@ -1641,15 +1633,19 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
- getSignExtendExpr(Step, WideTy)));
+ getSignExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1680,8 +1676,9 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
@@ -1697,8 +1694,10 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1706,8 +1705,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
@@ -1718,8 +1717,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// commute the zero extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache));
- return getAddExpr(Ops, SCEV::FlagNUW);
+ Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
+ return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
}
@@ -1732,31 +1731,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
return S;
}
-const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) {
- // Use the local cache to prevent exponential behavior of
- // getSignExtendExprImpl.
- ExtendCacheTy Cache;
- return getSignExtendExprCached(Op, Ty, Cache);
-}
-
-/// Query \p Cache before calling getSignExtendExprImpl. If there is no
-/// related entry in the \p Cache, call getSignExtendExprImpl and save
-/// the result in the \p Cache.
-const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
- auto It = Cache.find({Op, Ty});
- if (It != Cache.end())
- return It->second;
- const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache);
- auto InsertResult = Cache.insert({{Op, Ty}, SExt});
- assert(InsertResult.second && "Expect the key was not in the cache");
- (void)InsertResult;
- return SExt;
-}
-
-/// The real implementation of getSignExtendExpr.
-const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
+const SCEV *
+ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1766,15 +1742,15 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getSignExtendExprCached(SS->getOperand(), Ty, Cache);
+ return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
// sext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getZeroExtendExpr(SZ->getOperand(), Ty);
+ return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
@@ -1784,6 +1760,13 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ // Limit recursion depth.
+ if (Depth > MaxExtDepth) {
+ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+ }
// sext(trunc(x)) --> sext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1809,8 +1792,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
C2.ugt(C1) && C2.isPowerOf2())
- return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache),
- getSignExtendExprCached(SMul, Ty, Cache));
+ return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1),
+ getSignExtendExpr(SMul, Ty, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
}
}
}
@@ -1821,8 +1805,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// commute the sign extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getSignExtendExprCached(Op, Ty, Cache));
- return getAddExpr(Ops, SCEV::FlagNSW);
+ Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
+ return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
}
}
// If the input value is a chrec scev, and we can prove that the value
@@ -1845,8 +1829,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// we don't need to do any further analysis.
if (AR->hasNoSignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW);
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1870,22 +1854,29 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
- const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *SAdd =
- getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache);
- const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache);
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
+ SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
+ SCEV::FlagAnyWrap,
+ Depth + 1),
+ WideTy, Depth + 1);
+ const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
- getZeroExtendExpr(CastedMaxBECount, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(
- WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached(
- Step, WideTy, Cache)));
+ getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getSignExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
@@ -1893,7 +1884,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
- getZeroExtendExpr(Step, WideTy)));
+ getZeroExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// If AR wraps around then
//
@@ -1907,8 +1900,10 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1939,9 +1934,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L,
- AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
@@ -1955,25 +1949,26 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
C2.isPowerOf2()) {
- Start = getSignExtendExprCached(Start, Ty, Cache);
+ Start = getSignExtendExpr(Start, Ty, Depth + 1);
const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
AR->getNoWrapFlags());
- return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache));
+ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
}
}
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
// If the input value is provably positive and we could not simplify
// away the sext build a zext instead.
if (isKnownNonNegative(Op))
- return getZeroExtendExpr(Op, Ty);
+ return getZeroExtendExpr(Op, Ty, Depth + 1);
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 92328f6e5efd..f938a9a52065 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -89,8 +89,9 @@ TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
}
-int TargetTransformInfo::getUserCost(const User *U) const {
- int Cost = TTIImpl->getUserCost(U);
+int TargetTransformInfo::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) const {
+ int Cost = TTIImpl->getUserCost(U, Operands);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -116,8 +117,8 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
}
void TargetTransformInfo::getUnrollingPreferences(
- Loop *L, UnrollingPreferences &UP) const {
- return TTIImpl->getUnrollingPreferences(L, UP);
+ Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
+ return TTIImpl->getUnrollingPreferences(L, SE, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index cd9972ab56a6..86c528de267a 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -23,10 +23,10 @@
//
// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
// three fields, e.g.:
-// !0 = metadata !{ metadata !"an example type tree" }
-// !1 = metadata !{ metadata !"int", metadata !0 }
-// !2 = metadata !{ metadata !"float", metadata !0 }
-// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+// !0 = !{ !"an example type tree" }
+// !1 = !{ !"int", !0 }
+// !2 = !{ !"float", !0 }
+// !3 = !{ !"const float", !2, i64 1 }
//
// The first field is an identity field. It can be any value, usually
// an MDString, which uniquely identifies the type. The most important
@@ -74,13 +74,13 @@
// instruction. The base type is !4 (struct B), the access type is !2 (scalar
// type short) and the offset is 4.
//
-// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
-// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
-// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
-// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
-// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// !0 = !{!"Simple C/C++ TBAA"}
+// !1 = !{!"omnipotent char", !0} // Scalar type node
+// !2 = !{!"short", !1} // Scalar type node
+// !3 = !{!"A", !2, i64 0} // Struct type node
+// !4 = !{!"B", !2, i64 0, !3, i64 4}
// // Struct type node
-// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
+// !5 = !{!4, !2, i64 4} // Path tag node
//
// The struct type nodes and the scalar type nodes form a type DAG.
// Root (!0)
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index f24f22c88a8a..b19a07a9066b 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -191,8 +191,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
}
break;
- case 0x64: // x86-64 Windows.
- if (Magic[1] == char(0x86))
+ case 0x64: // x86-64 or ARM64 Windows.
+ if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
return file_magic::coff_object;
break;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 0629c2d326ae..1ebef3173135 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5360,8 +5360,9 @@ const std::error_category &llvm::BitcodeErrorCategory() {
return *ErrorCategory;
}
-static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
- if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID))
+static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
+ unsigned Block, unsigned RecordID) {
+ if (Stream.EnterSubBlock(Block))
return error("Invalid record");
StringRef Strtab;
@@ -5382,7 +5383,7 @@ static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
case BitstreamEntry::Record:
StringRef Blob;
SmallVector<uint64_t, 1> Record;
- if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB)
+ if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID)
Strtab = Blob;
break;
}
@@ -5450,7 +5451,8 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
}
if (Entry.ID == bitc::STRTAB_BLOCK_ID) {
- Expected<StringRef> Strtab = readStrtab(Stream);
+ Expected<StringRef> Strtab =
+ readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB);
if (!Strtab)
return Strtab.takeError();
// This string table is used by every preceding bitcode module that does
@@ -5462,6 +5464,28 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
break;
I->Strtab = *Strtab;
}
+ // Similarly, the string table is used by every preceding symbol table;
+ // normally there will be just one unless the bitcode file was created
+ // by binary concatenation.
+ if (!F.Symtab.empty() && F.StrtabForSymtab.empty())
+ F.StrtabForSymtab = *Strtab;
+ continue;
+ }
+
+ if (Entry.ID == bitc::SYMTAB_BLOCK_ID) {
+ Expected<StringRef> SymtabOrErr =
+ readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB);
+ if (!SymtabOrErr)
+ return SymtabOrErr.takeError();
+
+ // We can expect the bitcode file to have multiple symbol tables if it
+ // was created by binary concatenation. In that case we silently
+ // ignore any subsequent symbol tables, which is fine because this is a
+ // low level function. The client is expected to notice that the number
+ // of modules in the symbol table does not match the number of modules
+ // in the input file and regenerate the symbol table.
+ if (F.Symtab.empty())
+ F.Symtab = *SymtabOrErr;
continue;
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index feeba31908ae..b2b1ea6de374 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -29,10 +29,12 @@
#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SHA1.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <map>
@@ -3820,6 +3822,38 @@ void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
Stream->ExitBlock();
}
+void BitcodeWriter::writeSymtab() {
+ assert(!WroteStrtab && !WroteSymtab);
+
+ // If any module has module-level inline asm, we will require a registered asm
+ // parser for the target so that we can create an accurate symbol table for
+ // the module.
+ for (Module *M : Mods) {
+ if (M->getModuleInlineAsm().empty())
+ continue;
+
+ std::string Err;
+ const Triple TT(M->getTargetTriple());
+ const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
+ if (!T || !T->hasMCAsmParser())
+ return;
+ }
+
+ WroteSymtab = true;
+ SmallVector<char, 0> Symtab;
+ // The irsymtab::build function may be unable to create a symbol table if the
+ // module is malformed (e.g. it contains an invalid alias). Writing a symbol
+ // table is not required for correctness, but we still want to be able to
+ // write malformed modules to bitcode files, so swallow the error.
+ if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) {
+ consumeError(std::move(E));
+ return;
+ }
+
+ writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB,
+ {Symtab.data(), Symtab.size()});
+}
+
void BitcodeWriter::writeStrtab() {
assert(!WroteStrtab);
@@ -3843,6 +3877,15 @@ void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
bool GenerateHash, ModuleHash *ModHash) {
+ assert(!WroteStrtab);
+
+ // The Mods vector is used by irsymtab::build, which requires non-const
+ // Modules in case it needs to materialize metadata. But the bitcode writer
+ // requires that the module is materialized, so we can cast to non-const here,
+ // after checking that it is in fact materialized.
+ assert(M->isMaterialized());
+ Mods.push_back(const_cast<Module *>(M));
+
ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream,
ShouldPreserveUseListOrder, Index,
GenerateHash, ModHash);
@@ -3875,6 +3918,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
BitcodeWriter Writer(Buffer);
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
ModHash);
+ Writer.writeSymtab();
Writer.writeStrtab();
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
index a07c280fa9e3..ef6dc9f901e2 100644
--- a/lib/Bitcode/Writer/LLVMBuild.txt
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = BitWriter
parent = Bitcode
-required_libraries = Analysis Core MC Support
+required_libraries = Analysis Core MC Object Support
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c48fcaa7b0d1..ff427c9a0d75 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -631,7 +631,9 @@ void AsmPrinter::EmitFunctionHeader() {
const Function *F = MF->getFunction();
if (isVerbose())
- OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n';
+ OutStreamer->GetCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
// Print out constants referenced by the function
EmitConstantPool();
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index e94616fd5900..a81d56e9618b 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -365,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
const MachineFunction *MF) {
// Skip this instruction if it has the same location as the previous one.
- if (DL == CurFn->LastLoc)
+ if (!DL || DL == PrevInstLoc)
return;
const DIScope *Scope = DL.get()->getScope();
@@ -385,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!CurFn->HaveLineInfo)
CurFn->HaveLineInfo = true;
unsigned FileId = 0;
- if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile())
FileId = CurFn->LastFileId;
else
FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
- CurFn->LastLoc = DL;
+ PrevInstLoc = DL;
unsigned FuncId = CurFn->FuncId;
if (const DILocation *SiteLoc = DL->getInlinedAt()) {
@@ -2150,9 +2150,23 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
+
+ // If the first instruction of a new MBB has no location, find the first
+ // instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (DL == PrevInstLoc || !DL)
+ if (!DL && MI->getParent() != PrevInstBB) {
+ for (const auto &NextMI : *MI->getParent()) {
+ DL = NextMI.getDebugLoc();
+ if (DL)
+ break;
+ }
+ }
+ PrevInstBB = MI->getParent();
+
+ // If we still don't have a debug location, don't record a location.
+ if (!DL)
return;
+
maybeRecordLocation(DL, Asm->MF);
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 2cd495aec6dc..fd8f60425c24 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -118,7 +118,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
- DebugLoc LastLoc;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index dc39d1e6cb52..d4a90eeabe15 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -245,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
CURanges.back().setEnd(Range.getEnd());
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label, const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
- return addSectionDelta(Die, Attribute, Label, Sec);
-}
-
void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
@@ -380,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE(
FinalChildren.push_back(std::move(ScopeDIE));
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- new (DIEValueAllocator) DIEDelta(Hi, Lo));
-}
-
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 3c2fb8d99db7..e38672792867 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -127,10 +127,6 @@ public:
void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo);
-
DwarfCompileUnit &getCU() override { return *this; }
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
@@ -151,12 +147,6 @@ public:
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label,
- const MCSymbol *Sec);
-
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 708f5f7536ff..4f4ebfc56297 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1587,6 +1587,26 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
sizeof(Ty->getOffset()));
}
+DIE::value_iterator
+DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ return Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+DIE::value_iterator
+DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ return addSectionDelta(Die, Attribute, Label, Sec);
+}
+
bool DwarfTypeUnit::isDwoUnit() const {
// Since there are no skeleton type units, all type units are dwo type units
// when split DWARF is being used.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7acad2cbd89f..4cc01b3298d4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -291,6 +291,15 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ /// addSectionDelta - Add a label delta attribute data and value.
+ DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// Add a Dwarf section label attribute data and value.
+ DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec);
+
protected:
~DwarfUnit();
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index cb31c21293f4..b50e76f2e3ba 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1662,6 +1662,7 @@ class MemCmpExpansion {
PHINode *PhiRes;
bool IsUsedForZeroCmp;
const DataLayout &DL;
+ IRBuilder<> Builder;
unsigned calculateNumBlocks(unsigned Size);
void createLoadCmpBlocks();
@@ -1671,13 +1672,14 @@ class MemCmpExpansion {
void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
unsigned GEPIndex);
Value *getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed, IRBuilder<> &Builder);
+ unsigned &NumBytesProcessed);
void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed);
void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
+ Value *getMemCmpOneBlock(unsigned Size);
unsigned getLoadSize(unsigned Size);
unsigned getNumLoads(unsigned Size);
@@ -1702,7 +1704,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
unsigned MaxLoadSize, unsigned LoadsPerBlock,
const DataLayout &TheDataLayout)
: CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
- DL(TheDataLayout) {
+ DL(TheDataLayout), Builder(CI) {
// A memcmp with zero-comparison with only one block of load and compare does
// not need to set up any extra blocks. This case could be handled in the DAG,
@@ -1710,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
// we choose to handle this case too to avoid fragmented lowering.
IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
NumBlocks = calculateNumBlocks(Size);
- if (!IsUsedForZeroCmp || NumBlocks != 1) {
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -1731,7 +1733,6 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
}
- IRBuilder<> Builder(CI->getContext());
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
}
@@ -1754,8 +1755,6 @@ void MemCmpExpansion::createResultBlock() {
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
unsigned GEPIndex) {
- IRBuilder<> Builder(CI->getContext());
-
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1811,8 +1810,7 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
/// This is used in the case where the memcmp() call is compared equal or not
/// equal to zero.
Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed,
- IRBuilder<> &Builder) {
+ unsigned &NumBytesProcessed) {
std::vector<Value *> XorList, OrList;
Value *Diff;
@@ -1910,8 +1908,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
@@ -1946,8 +1943,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
return;
}
- IRBuilder<> Builder(CI->getContext());
-
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
assert(LoadSize <= MaxLoadSize && "Unexpected load type");
@@ -1975,9 +1970,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
if (DL.isLittleEndian()) {
- Function *F = LoadCmpBlocks[Index]->getParent();
-
- Function *Bswap = Intrinsic::getDeclaration(F->getParent(),
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
@@ -1995,16 +1988,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
}
- Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
-
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
- ConstantInt::get(Diff->getType(), 0));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
: LoadCmpBlocks[Index + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
- BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
@@ -2020,8 +2010,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
// memcmp result. It compares the two loaded source values and returns -1 if
// src1 < src2 and 1 if src1 > src2.
void MemCmpExpansion::emitMemCmpResultBlock() {
- IRBuilder<> Builder(CI->getContext());
-
// Special case: if memcmp result is used in a zero equality, result does not
// need to be calculated and can simply return 1.
if (IsUsedForZeroCmp) {
@@ -2070,7 +2058,6 @@ unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
}
void MemCmpExpansion::setupResultBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
Builder.SetInsertPoint(ResBlock.BB);
ResBlock.PhiSrc1 =
@@ -2080,8 +2067,6 @@ void MemCmpExpansion::setupResultBlockPHINodes() {
}
void MemCmpExpansion::setupEndBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
-
Builder.SetInsertPoint(&EndBlock->front());
PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
}
@@ -2102,11 +2087,45 @@ Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
/// in the general case.
Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
unsigned NumBytesProcessed = 0;
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ // TODO: Instead of comparing ULT, just subtract and return the difference?
+ Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Type *I32 = Builder.getInt32Ty();
+ Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
+ ConstantInt::get(I32, 1));
+ return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+}
+
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
@@ -2114,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
getMemCmpExpansionZeroCase(Size);
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (NumBlocks == 1 && NumLoadsPerBlock == 1)
+ return getMemCmpOneBlock(Size);
+
// This loop calls emitLoadCompareBlock for comparing Size bytes of the two
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
@@ -2218,7 +2241,6 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL) {
NumMemCmpCalls++;
- IRBuilder<> Builder(CI->getContext());
// TTI call to check if target would like to expand memcmp. Also, get the
// MaxLoadSize.
@@ -4378,14 +4400,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If the real base value actually came from an inttoptr, then the matcher
// will look through it and provide only the integer value. In that case,
// use it here.
- if (!ResultPtr && AddrMode.BaseReg) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
- AddrMode.BaseReg = nullptr;
- } else if (!ResultPtr && AddrMode.Scale == 1) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
- AddrMode.Scale = 0;
+ if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.Scale = 0;
+ }
}
if (!ResultPtr &&
@@ -4466,6 +4490,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
+ // We'd require a ptrtoint/inttoptr down the line, which we can't do for
+ // non-integral pointers, so in that case bail out now.
+ Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
+ Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
+ PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
+ PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
+ if (DL->isNonIntegralPointerType(Addr->getType()) ||
+ (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
+ (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
+ (AddrMode.BaseGV &&
+ DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
+ return false;
+
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
@@ -6367,7 +6404,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
// Update PHI nodes in both successors. The original BB needs to be
- // replaced in one succesor's PHI nodes, because the branch comes now from
+ // replaced in one successor's PHI nodes, because the branch comes now from
// the newly generated BB (NewBB). In the other successor we need to add one
// incoming edge to the PHI nodes, because both branch instructions target
// now the same successor. Depending on the original branch condition
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 239bad2f5355..521037f9d206 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,34 +11,69 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "irtranslator"
using namespace llvm;
char IRTranslator::ID = 0;
+
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
@@ -62,7 +97,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
@@ -71,7 +106,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
@@ -686,6 +720,26 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
+ case Intrinsic::exp:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::exp2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
case Intrinsic::fma:
MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
@@ -834,7 +888,6 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
-
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = Context.createTempSymbol();
@@ -1195,7 +1248,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
- ORE = make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5466efd7e90f..860fc9a4f8b6 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,19 +11,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
#define DEBUG_TYPE "instructionselector"
using namespace llvm;
-InstructionSelector::InstructionSelector() {}
+InstructionSelector::InstructionSelector() = default;
bool InstructionSelector::constrainOperandRegToRegClass(
MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
@@ -33,8 +36,8 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return llvm::constrainRegToClass(MRI, TII, RBI, I,
- I.getOperand(OpIdx).getReg(), RC);
+ return
+ constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
}
bool InstructionSelector::constrainSelectedInstRegOperands(
@@ -84,7 +87,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
-
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
return *VRegVal == Value;
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1d0d3dffa4c5..84b0a0ac4157 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -158,7 +158,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
+ if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
return UnableToLegalize;
MIRBuilder.setInstr(MI);
@@ -166,6 +166,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+ NarrowTy.getSizeInBits();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(Dst);
+ DstRegs.push_back(Dst);
+ }
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
@@ -193,6 +207,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_EXTRACT: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(0).getReg();
+ int64_t OpStart = MI.getOperand(2).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ unsigned SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_INSERT: {
if (TypeIdx != 0)
return UnableToLegalize;
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 595802f2228b..76917aa9660d 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,4 +1,4 @@
-//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,16 +18,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOpcodes.h"
+#include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
-LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+LegalizerInfo::LegalizerInfo() {
+ DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
+
// FIXME: these two can be legalized to the fundamental load/store Jakob
// proposed. Once loads & stores are supported.
DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
@@ -42,6 +51,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
@@ -75,8 +85,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_EXTRACT ||
- Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
@@ -172,21 +181,21 @@ Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Custom:
return Aspect.Type;
case NarrowScalar: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
}
case WidenScalar: {
- return findLegalType(Aspect, [](LLT Ty) -> LLT {
+ return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
});
}
case FewerElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
}
case MoreElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
}
}
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3c70013ea296..47c6214c0552 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -264,10 +264,13 @@ MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
}
MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ assert(MRI->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+ assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() ||
+ MRI->getType(Res) == MRI->getType(Op));
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -364,27 +367,36 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_SEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_ZEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-
MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
LLT SrcTy = MRI->getType(Src);
LLT DstTy = MRI->getType(Dst);
@@ -466,7 +478,7 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
}
MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
- return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res);
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
}
MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
@@ -482,6 +494,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
"input operands do not cover output register");
#endif
+ if (Ops.size() == 1)
+ return buildCast(Res, Ops[0]);
+
MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
MIB.addDef(Res);
for (unsigned i = 0; i < Ops.size(); ++i)
@@ -511,8 +526,11 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
unsigned Op, unsigned Index) {
+ assert(Index + MRI->getType(Op).getSizeInBits() <=
+ MRI->getType(Res).getSizeInBits() &&
+ "insertion past the end of a register");
+
if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
- assert(Index == 0 && "insertion past the end of a register");
return buildCast(Res, Op);
}
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 2eb3cdee694d..677941dbbf6d 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,39 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
#define DEBUG_TYPE "regbankselect"
@@ -37,6 +58,7 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
"Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
+
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
@@ -48,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
false)
RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
- MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ : MachineFunctionPass(ID), OptMode(RunningMode) {
initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
@@ -72,7 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
- MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -133,9 +154,11 @@ bool RegBankSelect::repairReg(
TargetRegisterInfo::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
- // Build the instruction used to repair, then clone it at the right places.
- MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
- MI->removeFromParent();
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MachineInstr *MI =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
<< '\n');
// TODO:
@@ -202,11 +225,11 @@ uint64_t RegBankSelect::getRepairCost(
RBI->copyCost(*DesiredRegBrank, *CurRegBank,
RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
- if (Cost != UINT_MAX)
+ if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
// Return the legalization cost of that repairing.
}
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
}
const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
@@ -352,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit(
// the repairing cost because of the PHIs already proceeded
// as already stated.
// Though the code will be correct.
- assert(0 && "Repairing cost may not be accurate");
+ assert(false && "Repairing cost may not be accurate");
} else {
// We need to do non-local repairing. Basically, patch all
// the uses (i.e., phis) that we already proceeded.
@@ -450,7 +473,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
uint64_t RepairCost = getRepairCost(MO, ValMapping);
// This is an impossible to repair cost.
- if (RepairCost == UINT_MAX)
+ if (RepairCost == std::numeric_limits<unsigned>::max())
continue;
// Bias used for splitting: 5%.
@@ -535,9 +558,11 @@ bool RegBankSelect::applyMapping(
llvm_unreachable("Other kind should not happen");
}
}
+
// Second, rewrite the instruction.
DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
+
return true;
}
@@ -638,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
RepairingPlacement::RepairingKind Kind)
// Default is, we are going to insert code to repair OpIdx.
- : Kind(Kind),
- OpIdx(OpIdx),
- CanMaterialize(Kind != RepairingKind::Impossible),
- HasSplit(false),
- P(P) {
+ : Kind(Kind), OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
const MachineOperand &MO = MI.getOperand(OpIdx);
assert(MO.isReg() && "Trying to repair a non-reg operand");
@@ -847,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
}
RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
- : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+ : LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
// Check if this overflows.
@@ -920,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
OtherLocalAdjust = Cost.LocalCost - LocalCost;
else
ThisLocalAdjust = LocalCost - Cost.LocalCost;
-
} else {
ThisLocalAdjust = LocalCost;
OtherLocalAdjust = Cost.LocalCost;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 398066bf8903..8c43c9f3f884 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,14 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
void LiveRangeCalc::resetLiveOutMap() {
unsigned NumBlocks = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(NumBlocks);
- EntryInfoMap.clear();
+ EntryInfos.clear();
Map.resize(NumBlocks);
}
@@ -283,8 +286,11 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
// Determine if the exit from the block is reached by some def.
unsigned N = WorkList[i];
MachineBasicBlock &B = *MF->getBlockNumbered(N);
- if (Seen[N] && Map[&B].first != nullptr)
- return MarkDefined(B);
+ if (Seen[N]) {
+ const LiveOutPair &LOB = Map[&B];
+ if (LOB.first != nullptr && LOB.first != &UndefVNI)
+ return MarkDefined(B);
+ }
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
// Treat End as not belonging to B.
@@ -365,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#endif
FoundUndef |= MBB->pred_empty();
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
-
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
// Is this a known live-out block?
if (Seen.test(Pred->getNumber())) {
if (VNInfo *VNI = Map[Pred].first) {
@@ -387,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
auto EP = LR.extendInBlock(Undefs, Start, End);
VNInfo *VNI = EP.first;
FoundUndef |= EP.second;
- setLiveOutValue(Pred, VNI);
+ setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
@@ -406,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
LiveIn.clear();
- FoundUndef |= (TheVNI == nullptr);
+ FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
if (Undefs.size() > 0 && FoundUndef)
UniqueVNI = false;
@@ -417,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
- assert(TheVNI != nullptr);
+ assert(TheVNI != nullptr && TheVNI != &UndefVNI);
LiveRangeUpdater Updater(&LR);
for (unsigned BN : WorkList) {
SlotIndex Start, End;
@@ -433,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
// Prepare the defined/undefined bit vectors.
- auto EF = EntryInfoMap.find(&LR);
- if (EF == EntryInfoMap.end()) {
+ EntryInfoMap::iterator Entry;
+ bool DidInsert;
+ std::tie(Entry, DidInsert) = EntryInfos.insert(
+ std::make_pair(&LR, std::make_pair(BitVector(), BitVector())));
+ if (DidInsert) {
+ // Initialize newly inserted entries.
unsigned N = MF->getNumBlockIDs();
- EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
- EF->second.first.resize(N);
- EF->second.second.resize(N);
+ Entry->second.first.resize(N);
+ Entry->second.second.resize(N);
}
- BitVector &DefOnEntry = EF->second.first;
- BitVector &UndefOnEntry = EF->second.second;
+ BitVector &DefOnEntry = Entry->second.first;
+ BitVector &UndefOnEntry = Entry->second.second;
// Multiple values were found, so transfer the work list to the LiveIn array
// where UpdateSSA will use it as a work list.
LiveIn.reserve(WorkList.size());
for (unsigned BN : WorkList) {
MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
- if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ if (Undefs.size() > 0 &&
+ !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
if (MBB == &UseMBB)
@@ -466,9 +473,9 @@ void LiveRangeCalc::updateSSA() {
assert(DomTree && "Missing dominator tree");
// Interate until convergence.
- unsigned Changes;
+ bool Changed;
do {
- Changes = 0;
+ Changed = false;
// Propagate live-out values down the dominator tree, inserting phi-defs
// when necessary.
for (LiveInBlock &I : LiveIn) {
@@ -491,15 +498,20 @@ void LiveRangeCalc::updateSSA() {
IDomValue = Map[IDom->getBlock()];
// Cache the DomTree node that defined the value.
- if (IDomValue.first && !IDomValue.second)
+ if (IDomValue.first && IDomValue.first != &UndefVNI &&
+ !IDomValue.second) {
Map[IDom->getBlock()].second = IDomValue.second =
DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+ }
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair &Value = Map[*PI];
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveOutPair &Value = Map[Pred];
if (!Value.first || Value.first == IDomValue.first)
continue;
+ if (Value.first == &UndefVNI) {
+ needPHI = true;
+ break;
+ }
// Cache the DomTree node that defined the value.
if (!Value.second)
@@ -523,7 +535,7 @@ void LiveRangeCalc::updateSSA() {
// Create a phi-def if required.
if (needPHI) {
- ++Changes;
+ Changed = true;
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
@@ -542,7 +554,7 @@ void LiveRangeCalc::updateSSA() {
LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
- } else if (IDomValue.first) {
+ } else if (IDomValue.first && IDomValue.first != &UndefVNI) {
// No phi-def here. Remember incoming value.
I.Value = IDomValue.first;
@@ -554,9 +566,9 @@ void LiveRangeCalc::updateSSA() {
// MBB is live-out and doesn't define its own value.
if (LOP.first == IDomValue.first)
continue;
- ++Changes;
+ Changed = true;
LOP = IDomValue;
}
}
- } while (Changes);
+ } while (Changed);
}
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 1a7598f8044a..d41b782d9bdf 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -24,6 +24,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -65,7 +66,8 @@ class LiveRangeCalc {
/// registers do not overlap), but the defined/undefined information must
/// be kept separate for each individual range.
/// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ EntryInfoMap EntryInfos;
/// Map each basic block where a live range is live out to the live-out value
/// and its defining block.
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index f58d1f8b83ae..c58d192284dd 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -579,12 +579,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
//
// is equivalent to
// liveins: %edi, %esi
- bool ExplicitSuccesors = false;
+ bool ExplicitSuccessors = false;
while (true) {
if (Token.is(MIToken::kw_successors)) {
if (parseBasicBlockSuccessors(MBB))
return true;
- ExplicitSuccesors = true;
+ ExplicitSuccessors = true;
} else if (Token.is(MIToken::kw_liveins)) {
if (parseBasicBlockLiveins(MBB))
return true;
@@ -636,7 +636,7 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
}
// Construct successor list by searching for basic block machine operands.
- if (!ExplicitSuccesors) {
+ if (!ExplicitSuccessors) {
SmallVector<MachineBasicBlock*,4> Successors;
bool IsFallthrough;
guessSuccessors(MBB, Successors, IsFallthrough);
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 6b6b5f2814a9..73c3428a6e53 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -52,6 +52,14 @@ void MachineOptimizationRemarkEmitter::emit(
computeHotness(OptDiag);
LLVMContext &Ctx = MF.getFunction()->getContext();
+
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+ return;
+ }
+
yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
if (Out) {
auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
@@ -73,7 +81,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI;
- if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
+ if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
else
MBFI = nullptr;
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 45ea0e4c39ab..5e279b065bbd 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,4 +1,4 @@
-//===- MacroFusion.cpp - Macro Fusion ----------------------===//
+//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#define DEBUG_TYPE "misched"
@@ -26,8 +33,6 @@ using namespace llvm;
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-namespace {
-
static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Create a single weak edge between the adjacent instrs. The only effect is
@@ -66,6 +71,7 @@ static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
++NumFused;
}
+namespace {
/// \brief Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
@@ -81,6 +87,8 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
+} // end anonymous namespace
+
void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
@@ -128,23 +136,18 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
return false;
}
-} // end anonymous namespace
-
-
-namespace llvm {
-
std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
return nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
-createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createBranchMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
return nullptr;
}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index da8fac6d3834..b13f6b68c420 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
+ class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
+ /// \brief Sequence of instructions that formulate recurrence cycle.
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(unsigned Reg,
+ const SmallSet<unsigned, 2> &TargetReg,
+ RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@@ -222,6 +246,28 @@ namespace {
}
};
+ /// \brief Helper class to hold instructions that are inside recurrence
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ typedef std::pair<unsigned, unsigned> IndexPair;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ Optional<IndexPair> CommutePair;
+ };
+
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
+/// phi is in a loop header, a recurrence may formulated around the source and
+/// destination of the phi. For such case commuting operands of the instructions
+/// in the recurrence may enable coalescing of the copy instruction generated
+/// from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %vreg1 = phi(%vreg0, %vreg100)
+/// LoopLatch:
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
+/// %vreg2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %vreg2 and
+/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<unsigned, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- if (MI->isPosition() || MI->isPHI())
+ if (MI->isPosition())
continue;
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
-
+
// MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
MI = FoldMI;
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
// If we run into an instruction we can't fold across, discard
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 50d241bff23d..9562652556ac 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -2622,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
}
// If we couldn't allocate a register from spilling, there is probably some
- // invalid inline assembly. The base class wil report it.
+ // invalid inline assembly. The base class will report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
Depth);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 7b3a5d5c5ff7..ff9bca092dbe 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -979,6 +979,11 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
for (LiveInterval::SubRange &SR : IntB.subranges())
SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+
+ // If the newly created Instruction has an address of an instruction that was
+ // deleted before (object recycled by the allocator) it needs to be removed from
+ // the deleted list.
+ ErasedInstrs.erase(NewCopyMI);
} else {
DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
<< MBB.getNumber() << '\t' << CopyMI);
@@ -989,6 +994,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// While updating the live-ranges, we only look at slot indices and
// never go back to the instruction.
LIS->RemoveMachineInstrFromMaps(CopyMI);
+ // Mark instructions as deleted.
+ ErasedInstrs.insert(&CopyMI);
CopyMI.eraseFromParent();
// Update the liveness.
@@ -3095,7 +3102,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.erase(CurrList[i])) {
+ if (ErasedInstrs.count(CurrList[i])) {
CurrList[i] = nullptr;
continue;
}
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index d2eff950d861..bd5ecbd28f29 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -243,10 +243,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
unsigned VReg = Intervals[ID]->reg;
MO.setReg(VReg);
- if (MO.isTied()) {
+
+ if (MO.isTied() && Reg != VReg) {
/// Undef use operands are not tracked in the equivalence class but need
/// to be update if they are tied.
MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
+
+ // substituteRegister breaks the iterator, so restart.
+ I = MRI->reg_nodbg_begin(Reg);
}
}
// TODO: We could attempt to recompute new register classes while visiting
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 7dd66d799be4..0f70b0e9ca07 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1089,7 +1089,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
// Things that are available after the instruction are killed by it.
bool IsKill = LiveRegs.available(MRI, Reg);
MO.setIsKill(IsKill);
- if (IsKill && addToLiveRegs)
+ if (addToLiveRegs)
LiveRegs.addReg(Reg);
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d02dcb6f4439..d901af727686 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4915,7 +4915,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@@ -8210,18 +8210,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
- if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getScalarSizeInBits();
-
- if (Amt < Size) {
- SDLoc SL(N);
- EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known;
+ DAG.computeKnownBits(Amt, Known);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- return DAG.getNode(ISD::SHL, SL, VT, Trunc,
- DAG.getConstant(Amt, SL, AmtVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
}
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
}
}
@@ -9751,6 +9753,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ // Fall through
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
// FMUL -> FMA combines:
if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
@@ -12394,7 +12442,7 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
@@ -12414,8 +12462,8 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
if (IsLoadSrc)
- LBasePtr =
- BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
+ LBasePtr = BaseIndexOffset::match(
+ cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
@@ -12429,7 +12477,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsLoadSrc) {
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -12443,7 +12491,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
return false;
- Ptr = BaseIndexOffset::match(Other->getBasePtr());
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// We looking for a root node which is an ancestor to all mergable
@@ -12786,7 +12834,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@@ -12829,6 +12877,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
@@ -12841,6 +12890,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
// Find a legal type for the vector store.
EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
bool IsFastSt, IsFastLd;
@@ -12926,11 +12979,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
+ MachineMemOperand::Flags MMOFlags = isDereferenceable ?
+ MachineMemOperand::MODereferenceable:
+ MachineMemOperand::MONone;
+
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
+ FirstLoad->getPointerInfo(), FirstLoadAlign,
+ MMOFlags);
NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign);
@@ -12940,7 +12998,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NewLoad =
DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- JointMemOpVT, FirstLoadAlign);
+ JointMemOpVT, FirstLoadAlign, MMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), JointMemOpVT,
@@ -15013,6 +15071,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
@@ -15034,11 +15097,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
- // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
- // power-of-2 truncations as they are the most likely.
- for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
- if (isTruncate(Scale))
- return DAG.getBitcast(VT, N00);
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
return SDValue();
}
@@ -16540,8 +16602,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
@@ -16751,7 +16813,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -16777,7 +16839,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d48..ac3247948169 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1827,11 +1827,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
-
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1864,13 +1863,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
-
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
- SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
- return;
- }
-
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1885,14 +1877,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
-
- SDValue Borrow;
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
- Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
- else
- Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
-
+ SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d2e0dbbf88ec..4e899ae6668e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -18,28 +19,41 @@ namespace llvm {
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
- // Obvious equivalent
+ // Initial Offset difference.
Off = Other.Offset - Offset;
- if (Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt)
- return true;
- // Match GlobalAddresses
- if (Index == Other.Index)
- if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base))
- if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
if (A->getGlobal() == B->getGlobal()) {
Off += B->getOffset() - A->getOffset();
return true;
}
- // TODO: we should be able to add FrameIndex analysis improvements here.
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // Match non-equal FrameIndexes - a FrameIndex stemming from an
+ // alloca will not have it's ObjectOffset set until post-DAG and
+ // as such we must assume the two framesIndices are incomparable.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (!MFI.getObjectAllocation(A->getIndex()) &&
+ !MFI.getObjectAllocation(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
return false;
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) {
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
// (((B + I*M) + c)) + c ...
SDValue Base = Ptr;
SDValue Index = SDValue();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f9f431db55be..acf68fbbdedf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3375,7 +3375,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && VectorWidth) {
- MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index b1918b19e1df..817e58ce59e1 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,4 +1,4 @@
-//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
+//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,29 +13,37 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetPassConfig.h"
-
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -225,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
namespace {
+
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
@@ -245,9 +254,11 @@ struct InsertedPass {
return NP;
}
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@@ -263,7 +274,8 @@ public:
/// is inserted after each instance of the first one.
SmallVector<InsertedPass, 4> InsertedPasses;
};
-} // namespace llvm
+
+} // end namespace llvm
// Out of line virtual method.
TargetPassConfig::~TargetPassConfig() {
@@ -273,11 +285,7 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
- AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true),
- RequireCodeGenSCCOrder(false) {
-
+ : ImmutablePass(ID), PM(&pm), TM(&TM) {
Impl = new PassConfigImpl();
// Register all target independent codegen passes to activate their PassIDs,
@@ -325,7 +333,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(nullptr) {
+ : ImmutablePass(ID) {
report_fatal_error("Trying to construct TargetPassConfig without a target "
"machine. Scheduling a CodeGen pass without a target "
"triple set?");
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 552a89f76ca2..83c00e24d14f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule",
cl::desc("Coalesce copies by rescheduling (default=true)"),
cl::init(true), cl::Hidden);
+// Limit the number of dataflow edges to traverse when evaluating the benefit
+// of commuting operands.
+static cl::opt<unsigned> MaxDataFlowEdge(
+ "dataflow-edge-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of dataflow edges to traverse when evaluating "
+ "the benefit of commuting operands"));
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, 3))
+ if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, 3))
+ if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute
diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index d058f4864975..e0c7ef58c304 100644
--- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -29,10 +29,8 @@ static Error visitKnownRecord(CVSymbol &Record,
return Error::success();
}
-Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
- if (auto EC = Callbacks.visitSymbolBegin(Record))
- return EC;
-
+static Error finishVisitation(CVSymbol &Record,
+ SymbolVisitorCallbacks &Callbacks) {
switch (Record.Type) {
default:
if (auto EC = Callbacks.visitUnknownSymbol(Record))
@@ -55,6 +53,18 @@ Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
return Error::success();
}
+Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
+ if (auto EC = Callbacks.visitSymbolBegin(Record))
+ return EC;
+ return finishVisitation(Record, Callbacks);
+}
+
+Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record, uint32_t Offset) {
+ if (auto EC = Callbacks.visitSymbolBegin(Record, Offset))
+ return EC;
+ return finishVisitation(Record, Callbacks);
+}
+
Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
for (auto I : Symbols) {
if (auto EC = visitSymbolRecord(I))
@@ -62,3 +72,13 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
}
return Error::success();
}
+
+Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols,
+ uint32_t InitialOffset) {
+ for (auto I : Symbols) {
+ if (auto EC = visitSymbolRecord(I, InitialOffset))
+ return EC;
+ InitialOffset += I.length();
+ }
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index c31b8d1c96d5..ccc20eb74887 100644
--- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugChecksumsSubsection.cpp ----------------------*- C++ -*-===//
+//===- DebugChecksumsSubsection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,10 +8,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
@@ -25,7 +32,7 @@ struct FileChecksumEntryHeader {
// Checksum bytes follow.
};
-Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::
+Error VarStreamArrayExtractor<FileChecksumEntry>::
operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) {
BinaryStreamReader Reader(Stream);
@@ -48,6 +55,7 @@ Error DebugChecksumsSubsectionRef::initialize(BinaryStreamReader Reader) {
return Error::success();
}
+
Error DebugChecksumsSubsectionRef::initialize(BinaryStreamRef Section) {
BinaryStreamReader Reader(Section);
return initialize(Reader);
diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
index 21e2cc56075b..cef27787cfd1 100644
--- a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugCrossExSubsection.cpp -------------------------------*- C++ -*-===//
+//===- DebugCrossExSubsection.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,8 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
-
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index 2c4a0b779342..88c0076915b5 100644
--- a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugCrossImpSubsection.cpp ------------------------------*- C++ -*-===//
+//===- DebugCrossImpSubsection.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
-
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
-namespace llvm {
Error VarStreamArrayExtractor<CrossModuleImportItem>::
operator()(BinaryStreamRef Stream, uint32_t &Len,
codeview::CrossModuleImportItem &Item) {
@@ -34,7 +41,6 @@ operator()(BinaryStreamRef Stream, uint32_t &Len,
return EC;
return Error::success();
}
-}
Error DebugCrossModuleImportsSubsectionRef::initialize(
BinaryStreamReader Reader) {
diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index e7719d05dbdc..077c103a615b 100644
--- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugInlineeLinesSubsection.cpp ------------------------*- C++-*-===//
+//===- DebugInlineeLinesSubsection.cpp ------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,11 +8,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
-
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index fbcad61d60a6..57ad40819fbc 100644
--- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugLinesSubsection.cpp -------------------------------*- C++-*-===//
+//===- DebugLinesSubsection.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,18 +8,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
-
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len,
LineColumnEntry &Item) {
- using namespace codeview;
const LineBlockFragmentHeader *BlockHeader;
BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(BlockHeader))
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index de02525270c4..d723282eb715 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugStringTableSubsection.cpp - CodeView String Table ---*- C++ -*-===//
+//===- DebugStringTableSubsection.cpp - CodeView String Table -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,10 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-
-#include "llvm/Support/BinaryStream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
@@ -23,6 +27,7 @@ Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) {
Stream = Contents;
return Error::success();
}
+
Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) {
return Reader.readStreamRef(Stream);
}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index d69eca018e0c..55f343c11e7f 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -1,4 +1,4 @@
-//===- DebugSubsectionRecord.cpp -----------------------------*- C++-*-===//
+//===- DebugSubsectionRecord.cpp ------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
-DebugSubsectionRecord::DebugSubsectionRecord()
- : Container(CodeViewContainer::ObjectFile),
- Kind(DebugSubsectionKind::None) {}
+DebugSubsectionRecord::DebugSubsectionRecord() = default;
DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind,
BinaryStreamRef Data,
diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
index 5f91b68f3ad8..60fbf9d747b2 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugSymbolRVASubsection.cpp ------------------------------*- C++-*-===//
+//===- DebugSymbolRVASubsection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index ec00af28395e..4cfb55a31b35 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -1,4 +1,4 @@
-//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===//
+//===- EnumTables.cpp - Enum to string conversion tables ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <type_traits>
using namespace llvm;
using namespace codeview;
@@ -333,6 +335,7 @@ static const EnumEntry<COFF::SectionCharacteristics>
namespace llvm {
namespace codeview {
+
ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames() {
return makeArrayRef(SymbolTypeNames);
}
@@ -348,48 +351,63 @@ ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
return makeArrayRef(PublicSymFlagNames);
}
+
ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames() {
return makeArrayRef(ProcSymFlagNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames() {
return makeArrayRef(LocalFlags);
}
+
ArrayRef<EnumEntry<uint8_t>> getFrameCookieKindNames() {
return makeArrayRef(FrameCookieKinds);
}
+
ArrayRef<EnumEntry<SourceLanguage>> getSourceLanguageNames() {
return makeArrayRef(SourceLanguages);
}
+
ArrayRef<EnumEntry<uint32_t>> getCompileSym2FlagNames() {
return makeArrayRef(CompileSym2FlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getCompileSym3FlagNames() {
return makeArrayRef(CompileSym3FlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getFileChecksumNames() {
return makeArrayRef(FileChecksumNames);
}
+
ArrayRef<EnumEntry<unsigned>> getCPUTypeNames() {
return makeArrayRef(CPUTypeNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getFrameProcSymFlagNames() {
return makeArrayRef(FrameProcSymFlagNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getExportSymFlagNames() {
return makeArrayRef(ExportSymFlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getModuleSubstreamKindNames() {
return makeArrayRef(ModuleSubstreamKindNames);
}
+
ArrayRef<EnumEntry<uint8_t>> getThunkOrdinalNames() {
return makeArrayRef(ThunkOrdinalNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getTrampolineNames() {
return makeArrayRef(TrampolineNames);
}
+
ArrayRef<EnumEntry<COFF::SectionCharacteristics>>
getImageSectionCharacteristicNames() {
return makeArrayRef(ImageSectionCharacteristicNames);
}
-}
-}
+
+} // end namespace codeview
+} // end namespace llvm
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
index ef00bd8570fa..1fa8d219d6ac 100644
--- a/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -1,4 +1,4 @@
-//===- Formatters.cpp -------------------------------------------*- C++ -*-===//
+//===- Formatters.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
using namespace llvm::codeview;
@@ -19,7 +23,7 @@ GuidAdapter::GuidAdapter(StringRef Guid)
GuidAdapter::GuidAdapter(ArrayRef<uint8_t> Guid)
: FormatAdapter(std::move(Guid)) {}
-void GuidAdapter::format(llvm::raw_ostream &Stream, StringRef Style) {
+void GuidAdapter::format(raw_ostream &Stream, StringRef Style) {
static const char *Lookup = "0123456789ABCDEF";
assert(Item.size() == 16 && "Expected 16-byte GUID");
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 20f7e72c3af3..5aaf3f1453a8 100644
--- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -1,4 +1,4 @@
-//===- LazyRandomTypeCollection.cpp ---------------------------- *- C++--*-===//
+//===- LazyRandomTypeCollection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/TypeName.h"
-#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
index 928bf8c94f73..306af1d1ef6b 100644
--- a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
+++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -1,4 +1,4 @@
-//===- StringsAndChecksums.cpp ----------------------------------*- C++ -*-===//
+//===- StringsAndChecksums.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
using namespace llvm;
using namespace llvm::codeview;
-StringsAndChecksumsRef::StringsAndChecksumsRef() {}
+StringsAndChecksumsRef::StringsAndChecksumsRef() = default;
StringsAndChecksumsRef::StringsAndChecksumsRef(
const DebugStringTableSubsectionRef &Strings)
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 9f2d619d1a1c..9a2e776feb75 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -1,4 +1,4 @@
-//===- SymbolSerializer.cpp -------------------------------------*- C++ -*-===//
+//===- SymbolSerializer.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
@@ -15,7 +22,7 @@ using namespace llvm::codeview;
SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
CodeViewContainer Container)
: Storage(Allocator), RecordBuffer(MaxRecordLength),
- Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Stream(RecordBuffer, support::little), Writer(Stream),
Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 1226d5be3f3c..72cb9e2e3544 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -438,6 +438,25 @@ void llvm::codeview::discoverTypeIndices(const CVType &Type,
::discoverTypeIndices(Type.content(), Type.kind(), Refs);
}
+void llvm::codeview::discoverTypeIndices(const CVType &Type,
+ SmallVectorImpl<TypeIndex> &Indices) {
+
+ Indices.clear();
+
+ SmallVector<TiReference, 4> Refs;
+ discoverTypeIndices(Type, Refs);
+ if (Refs.empty())
+ return;
+
+ BinaryStreamReader Reader(Type.content(), support::little);
+ for (const auto &Ref : Refs) {
+ Reader.setOffset(Ref.Offset);
+ FixedStreamArray<TypeIndex> Run;
+ cantFail(Reader.readArray(Run, Ref.Count));
+ Indices.append(Run.begin(), Run.end());
+ }
+}
+
void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TiReference> &Refs) {
const RecordPrefix *P =
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
index 93c1198e36ce..003c13b4a20d 100644
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -1,4 +1,4 @@
-//===- TypeSerialzier.cpp ---------------------------------------*- C++ -*-===//
+//===- TypeSerialzier.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
-
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
-
-#include <string.h>
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
namespace {
+
struct HashedType {
uint64_t Hash;
const uint8_t *Data;
@@ -30,20 +41,26 @@ struct HashedType {
struct HashedTypePtr {
HashedTypePtr() = default;
HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
+
HashedType *Ptr = nullptr;
};
-} // namespace
+
+} // end anonymous namespace
namespace llvm {
+
template <> struct DenseMapInfo<HashedTypePtr> {
static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
+
static inline HashedTypePtr getTombstoneKey() {
return HashedTypePtr(reinterpret_cast<HashedType *>(1));
}
+
static unsigned getHashValue(HashedTypePtr Val) {
assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
return Val.Ptr->Hash;
}
+
static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
HashedType *LHS = LHSP.Ptr;
HashedType *RHS = RHSP.Ptr;
@@ -54,7 +71,8 @@ template <> struct DenseMapInfo<HashedTypePtr> {
return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
}
};
-}
+
+} // end namespace llvm
/// Private implementation so that we don't leak our DenseMap instantiations to
/// users.
@@ -159,13 +177,13 @@ TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash)
: RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2),
- Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Stream(RecordBuffer, support::little), Writer(Stream),
Mapping(Writer) {
// RecordBuffer needs to be able to hold enough data so that if we are 1
// byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
// we won't overflow.
if (Hash)
- Hasher = make_unique<TypeHasher>(Storage);
+ Hasher = llvm::make_unique<TypeHasher>(Storage);
}
TypeSerializer::~TypeSerializer() = default;
@@ -331,7 +349,7 @@ Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
- MutableBinaryByteStream CS(SavedSegment, llvm::support::little);
+ MutableBinaryByteStream CS(SavedSegment, support::little);
BinaryStreamWriter CW(CS);
if (auto EC = CW.writeBytes(CopyData))
return EC;
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index 6ca6e64bd8e6..11f94509e8fa 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMDebugInfoDWARF
DWARFAcceleratorTable.cpp
DWARFCompileUnit.cpp
DWARFContext.cpp
+ DWARFDataExtractor.cpp
DWARFDebugAbbrev.cpp
DWARFDebugArangeSet.cpp
DWARFDebugAranges.cpp
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 87009bf1b6a1..9ae7c9a07f76 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -62,6 +62,45 @@ uint32_t DWARFAcceleratorTable::getHeaderDataLength() {
return Hdr.HeaderDataLength;
}
+ArrayRef<std::pair<DWARFAcceleratorTable::HeaderData::AtomType,
+ DWARFAcceleratorTable::HeaderData::Form>>
+DWARFAcceleratorTable::getAtomsDesc() {
+ return HdrData.Atoms;
+}
+
+bool DWARFAcceleratorTable::validateForms() {
+ for (auto Atom : getAtomsDesc()) {
+ DWARFFormValue FormValue(Atom.second);
+ switch (Atom.first) {
+ case dwarf::DW_ATOM_die_offset:
+ if ((!FormValue.isFormClass(DWARFFormValue::FC_Constant) &&
+ !FormValue.isFormClass(DWARFFormValue::FC_Flag)) ||
+ FormValue.getForm() == dwarf::DW_FORM_sdata)
+ return false;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+uint32_t DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) {
+ uint32_t DieOffset = dwarf::DW_INVALID_OFFSET;
+
+ for (auto Atom : getAtomsDesc()) {
+ DWARFFormValue FormValue(Atom.second);
+ FormValue.extractValue(AccelSection, &HashDataOffset, NULL);
+ switch (Atom.first) {
+ case dwarf::DW_ATOM_die_offset:
+ DieOffset = *FormValue.getAsUnsignedConstant();
+ break;
+ default:
+ break;
+ }
+ }
+ return DieOffset;
+}
+
LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
// Dump the header.
OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n'
@@ -121,8 +160,7 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
continue;
}
while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
- unsigned StringOffset =
- getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs);
+ unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset);
if (!StringOffset)
break;
OS << format(" Name: %08x \"%s\"\n", StringOffset,
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 381479461750..a18d4efec07a 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -59,26 +59,13 @@ using DWARFLineTable = DWARFDebugLine::LineTable;
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
-uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
- uint32_t *Off, const RelocAddrMap *Relocs,
- uint64_t *SectionIndex) {
- if (!Relocs)
- return Data.getUnsigned(Off, Size);
- RelocAddrMap::const_iterator AI = Relocs->find(*Off);
- if (AI == Relocs->end())
- return Data.getUnsigned(Off, Size);
- if (SectionIndex)
- *SectionIndex = AI->second.SectionIndex;
- return Data.getUnsigned(Off, Size) + AI->second.Value;
-}
-
static void dumpAccelSection(raw_ostream &OS, StringRef Name,
const DWARFSection& Section, StringRef StringSection,
bool LittleEndian) {
- DataExtractor AccelSection(Section.Data, LittleEndian, 0);
+ DWARFDataExtractor AccelSection(Section, LittleEndian, 0);
DataExtractor StrData(StringSection, LittleEndian, 0);
OS << "\n." << Name << " contents:\n";
- DWARFAcceleratorTable Accel(AccelSection, StrData, Section.Relocs);
+ DWARFAcceleratorTable Accel(AccelSection, StrData);
if (!Accel.extract())
return;
Accel.dump(OS);
@@ -88,7 +75,7 @@ static void
dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
const DWARFSection &StringOffsetsSection,
StringRef StringSection, bool LittleEndian) {
- DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+ DWARFDataExtractor StrOffsetExt(StringOffsetsSection, LittleEndian, 0);
uint32_t Offset = 0;
uint64_t SectionSize = StringOffsetsSection.Data.size();
@@ -144,8 +131,8 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
while (Offset - ContributionBase < ContributionSize) {
OS << format("0x%8.8x: ", Offset);
// FIXME: We can only extract strings in DWARF32 format at the moment.
- uint64_t StringOffset = getRelocatedValue(
- StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs);
+ uint64_t StringOffset =
+ StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
if (Format == DWARF32) {
OS << format("%8.8x ", StringOffset);
uint32_t StringOffset32 = (uint32_t)StringOffset;
@@ -287,11 +274,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
if (!CUDIE)
continue;
if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list))) {
- DataExtractor lineData(getLineSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor lineData(getLineSection(), isLittleEndian(),
+ savedAddressByteSize);
DWARFDebugLine::LineTable LineTable;
uint32_t Offset = *StmtOffset;
- LineTable.parse(lineData, &getLineSection().Relocs, &Offset);
+ LineTable.parse(lineData, &Offset);
LineTable.dump(OS);
}
}
@@ -310,8 +297,8 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) {
OS << "\n.debug_line.dwo contents:\n";
unsigned stmtOffset = 0;
- DataExtractor lineData(getLineDWOSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor lineData(getLineDWOSection(), isLittleEndian(),
+ savedAddressByteSize);
DWARFDebugLine::LineTable LineTable;
while (LineTable.Prologue.parse(lineData, &stmtOffset)) {
LineTable.dump(OS);
@@ -348,11 +335,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
// sizes, but for simplicity we just use the address byte size of the last
// compile unit (there is no easy and fast way to associate address range
// list and the compile unit it describes).
- DataExtractor rangesData(getRangeSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor rangesData(getRangeSection(), isLittleEndian(),
+ savedAddressByteSize);
offset = 0;
DWARFDebugRangeList rangeList;
- while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs))
+ while (rangeList.extract(rangesData, &offset))
rangeList.dump(OS);
}
@@ -499,11 +486,13 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
if (Loc)
return Loc.get();
- DataExtractor LocData(getLocSection().Data, isLittleEndian(), 0);
- Loc.reset(new DWARFDebugLoc(getLocSection().Relocs));
+ Loc.reset(new DWARFDebugLoc);
// assume all compile units have the same address byte size
- if (getNumCompileUnits())
- Loc->parse(LocData, getCompileUnitAtIndex(0)->getAddressByteSize());
+ if (getNumCompileUnits()) {
+ DWARFDataExtractor LocData(getLocSection(), isLittleEndian(),
+ getCompileUnitAtIndex(0)->getAddressByteSize());
+ Loc->parse(LocData);
+ }
return Loc.get();
}
@@ -570,7 +559,7 @@ const DWARFDebugMacro *DWARFContext::getDebugMacro() {
const DWARFLineTable *
DWARFContext::getLineTableForUnit(DWARFUnit *U) {
if (!Line)
- Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
+ Line.reset(new DWARFDebugLine);
auto UnitDIE = U->getUnitDIE();
if (!UnitDIE)
@@ -586,12 +575,12 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
return lt;
// Make sure the offset is good before we try to parse.
- if (stmtOffset >= U->getLineSection().size())
+ if (stmtOffset >= U->getLineSection().Data.size())
return nullptr;
// We have to parse it first.
- DataExtractor lineData(U->getLineSection(), isLittleEndian(),
- U->getAddressByteSize());
+ DWARFDataExtractor lineData(U->getLineSection(), isLittleEndian(),
+ U->getAddressByteSize());
return Line->getOrParseLineTable(lineData, stmtOffset);
}
@@ -870,13 +859,13 @@ static Expected<SymInfo> getSymbolInfo(const object::ObjectFile &Obj,
Expected<uint64_t> SymAddrOrErr = Sym->getAddress();
if (!SymAddrOrErr)
- return createError("error: failed to compute symbol address: ",
+ return createError("failed to compute symbol address: ",
SymAddrOrErr.takeError());
// Also remember what section this symbol is in for later
auto SectOrErr = Sym->getSection();
if (!SectOrErr)
- return createError("error: failed to get symbol section: ",
+ return createError("failed to get symbol section: ",
SectOrErr.takeError());
RSec = *SectOrErr;
@@ -937,8 +926,14 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec,
return Error::success();
}
-DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
- const LoadedObjectInfo *L)
+ErrorPolicy DWARFContextInMemory::defaultErrorHandler(Error E) {
+ errs() << "error: " + toString(std::move(E)) << '\n';
+ return ErrorPolicy::Continue;
+}
+
+DWARFContextInMemory::DWARFContextInMemory(
+ const object::ObjectFile &Obj, const LoadedObjectInfo *L,
+ function_ref<ErrorPolicy(Error)> HandleError)
: FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()) {
for (const SectionRef &Section : Obj.sections()) {
@@ -961,9 +956,10 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
Section.getContents(data);
if (auto Err = maybeDecompress(Section, name, data)) {
- errs() << "error: failed to decompress '" + name + "', " +
- toString(std::move(Err))
- << '\n';
+ ErrorPolicy EP = HandleError(
+ createError("failed to decompress '" + name + "', ", std::move(Err)));
+ if (EP == ErrorPolicy::Halt)
+ return;
continue;
}
@@ -1055,7 +1051,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
Expected<SymInfo> SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache);
if (!SymInfoOrErr) {
- errs() << toString(SymInfoOrErr.takeError()) << '\n';
+ if (HandleError(SymInfoOrErr.takeError()) == ErrorPolicy::Halt)
+ return;
continue;
}
@@ -1064,7 +1061,11 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
if (V.error()) {
SmallString<32> Name;
Reloc.getTypeName(Name);
- errs() << "error: failed to compute relocation: " << Name << "\n";
+ ErrorPolicy EP = HandleError(
+ createError("failed to compute relocation: " + Name + ", ",
+ errorCodeToError(object_error::parse_failed)));
+ if (EP == ErrorPolicy::Halt)
+ return;
continue;
}
RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
new file mode 100644
index 000000000000..001097e56c71
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -0,0 +1,24 @@
+//===- DWARFDataExtractor.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+
+using namespace llvm;
+
+uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
+ uint64_t *SecNdx) const {
+ if (!RelocMap)
+ return getUnsigned(Off, Size);
+ RelocAddrMap::const_iterator AI = RelocMap->find(*Off);
+ if (AI == RelocMap->end())
+ return getUnsigned(Off, Size);
+ if (SecNdx)
+ *SecNdx = AI->second.SectionIndex;
+ return getUnsigned(Off, Size) + AI->second.Value;
+}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 1551974b822a..976bc4651ae6 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -21,13 +21,13 @@ using namespace dwarf;
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
uint32_t *OffsetPtr) {
- DataExtractor DebugInfoData = U.getDebugInfoExtractor();
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
const uint32_t UEndOffset = U.getNextUnitOffset();
return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
}
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
- const DataExtractor &DebugInfoData,
+ const DWARFDataExtractor &DebugInfoData,
uint32_t UEndOffset, uint32_t D) {
Offset = *OffsetPtr;
Depth = D;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index ad5647f3e03d..7d180564e9f7 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -94,8 +94,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
// Parse v2-v4 directory and file tables.
static void
-parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
- uint64_t EndPrologueOffset,
+parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
+ uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
std::vector<StringRef> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
while (*OffsetPtr < EndPrologueOffset) {
@@ -122,7 +122,7 @@ parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
// Returns the descriptors, or an empty vector if we did not find a path or
// ran off the end of the prologue.
static ContentDescriptors
-parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
+parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
uint64_t EndPrologueOffset) {
ContentDescriptors Descriptors;
int FormatCount = DebugLineData.getU8(OffsetPtr);
@@ -142,8 +142,8 @@ parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
}
static bool
-parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
- uint64_t EndPrologueOffset,
+parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
+ uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
const DWARFFormParams &FormParams,
std::vector<StringRef> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
@@ -212,7 +212,7 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
return true;
}
-bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
+bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
uint32_t *OffsetPtr) {
const uint64_t PrologueOffset = *OffsetPtr;
@@ -381,20 +381,19 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const {
}
const DWARFDebugLine::LineTable *
-DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData,
+DWARFDebugLine::getOrParseLineTable(const DWARFDataExtractor &DebugLineData,
uint32_t Offset) {
std::pair<LineTableIter, bool> Pos =
LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable()));
LineTable *LT = &Pos.first->second;
if (Pos.second) {
- if (!LT->parse(DebugLineData, RelocMap, &Offset))
+ if (!LT->parse(DebugLineData, &Offset))
return nullptr;
}
return LT;
}
-bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData,
- const RelocAddrMap *RMap,
+bool DWARFDebugLine::LineTable::parse(const DWARFDataExtractor &DebugLineData,
uint32_t *OffsetPtr) {
const uint32_t DebugLineOffset = *OffsetPtr;
@@ -443,8 +442,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData,
// relocatable address. All of the other statement program opcodes
// that affect the address register add a delta to it. This instruction
// stores a relocatable value into it instead.
- State.Row.Address = getRelocatedValue(
- DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap);
+ State.Row.Address = DebugLineData.getRelocatedAddress(OffsetPtr);
break;
case DW_LNE_define_file:
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 2178bef65d1d..c240dd7406d9 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -40,9 +40,9 @@ void DWARFDebugLoc::dump(raw_ostream &OS) const {
}
}
-void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
+void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
uint32_t Offset = 0;
- while (data.isValidOffset(Offset+AddressSize-1)) {
+ while (data.isValidOffset(Offset+data.getAddressSize()-1)) {
Locations.resize(Locations.size() + 1);
LocationList &Loc = Locations.back();
Loc.Offset = Offset;
@@ -51,8 +51,8 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
while (true) {
// A beginning and ending address offsets.
Entry E;
- E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
- E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
+ E.Begin = data.getRelocatedAddress(&Offset);
+ E.End = data.getRelocatedAddress(&Offset);
// The end of any given location list is marked by an end of list entry,
// which consists of a 0 for the beginning address offset and a 0 for the
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index 43201293fe60..0b6ae86fd94b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -23,8 +23,8 @@ void DWARFDebugRangeList::clear() {
Entries.clear();
}
-bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
- const RelocAddrMap &Relocs) {
+bool DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
+ uint32_t *offset_ptr) {
clear();
if (!data.isValidOffset(*offset_ptr))
return false;
@@ -35,10 +35,9 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
while (true) {
RangeListEntry entry;
uint32_t prev_offset = *offset_ptr;
- entry.StartAddress = getRelocatedValue(data, AddressSize, offset_ptr,
- &Relocs, &entry.SectionIndex);
- entry.EndAddress =
- getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
+ entry.StartAddress =
+ data.getRelocatedAddress(offset_ptr, &entry.SectionIndex);
+ entry.EndAddress = data.getRelocatedAddress(offset_ptr);
// Check that both values were extracted correctly.
if (*offset_ptr != prev_offset + 2 * AddressSize) {
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index b4b682dd11b5..ef416f72ad17 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -308,7 +308,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent,
DIDumpOptions DumpOpts) const {
if (!isValid())
return;
- DataExtractor debug_info_data = U->getDebugInfoExtractor();
+ DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor();
const uint32_t Offset = getOffset();
uint32_t offset = Offset;
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 861114bde1f2..83a7792e1244 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -275,7 +275,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
FC == FC_SectionOffset;
}
-bool DWARFFormValue::extractValue(const DataExtractor &Data,
+bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
uint32_t *OffsetPtr, const DWARFUnit *CU) {
U = CU;
bool Indirect = false;
@@ -290,10 +290,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_ref_addr: {
if (!U)
return false;
- uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize()
- : U->getRefAddrByteSize();
- Value.uval = getRelocatedValue(Data, AddrSize, OffsetPtr,
- U->getRelocMap(), &Value.SectionIndex);
+ uint16_t Size = (Form == DW_FORM_addr) ? U->getAddressByteSize()
+ : U->getRefAddrByteSize();
+ Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex);
break;
}
case DW_FORM_exprloc:
@@ -333,11 +332,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_ref4:
case DW_FORM_ref_sup4:
case DW_FORM_strx4:
- case DW_FORM_addrx4: {
- const RelocAddrMap *RelocMap = U ? U->getRelocMap() : nullptr;
- Value.uval = getRelocatedValue(Data, 4, OffsetPtr, RelocMap);
+ case DW_FORM_addrx4:
+ Value.uval = Data.getRelocatedValue(4, OffsetPtr);
break;
- }
case DW_FORM_data8:
case DW_FORM_ref8:
case DW_FORM_ref_sup8:
@@ -365,8 +362,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_strp_sup: {
if (!U)
return false;
- Value.uval = getRelocatedValue(Data, U->getDwarfOffsetByteSize(),
- OffsetPtr, U->getRelocMap());
+ Value.uval =
+ Data.getRelocatedValue(U->getDwarfOffsetByteSize(), OffsetPtr);
break;
}
case DW_FORM_flag_present:
@@ -576,7 +573,6 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
uint64_t StrOffset;
if (!U->getStringOffsetSectionItem(Offset, StrOffset))
return None;
- StrOffset += U->getStringOffsetSectionRelocation(Offset);
Offset = StrOffset;
}
if (const char *Str = U->getStringExtractor().getCStr(&Offset)) {
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index fd9c7c2b1d46..043bdb874f43 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -32,8 +32,7 @@ using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
C.getStringSection(), C.getStringOffsetSection(),
- &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(),
- false);
+ &C.getAddrSection(), C.getLineSection(), C.isLittleEndian(), false);
}
void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
@@ -41,15 +40,15 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
DWARFUnitIndex *Index) {
parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(),
C.getStringDWOSection(), C.getStringOffsetDWOSection(),
- &C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(),
+ &C.getAddrSection(), C.getLineDWOSection(), C.isLittleEndian(),
true);
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
- const DWARFUnitSectionBase &UnitSection,
+ const DWARFSection *AOS, const DWARFSection &LS, bool LE,
+ bool IsDWO, const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
LineSection(LS), StringSection(SS), StringOffsetSection(SOS),
@@ -65,33 +64,23 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
return false;
- DataExtractor DA(AddrOffsetSection->Data, isLittleEndian,
- getAddressByteSize());
- Result = getRelocatedValue(DA, getAddressByteSize(), &Offset,
- &AddrOffsetSection->Relocs);
+ DWARFDataExtractor DA(*AddrOffsetSection, isLittleEndian,
+ getAddressByteSize());
+ Result = DA.getRelocatedAddress(&Offset);
return true;
}
bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
uint64_t &Result) const {
- unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+ unsigned ItemSize = getDwarfOffsetByteSize();
uint32_t Offset = StringOffsetSectionBase + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
return false;
- DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0);
- Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset);
+ DWARFDataExtractor DA(StringOffsetSection, isLittleEndian, 0);
+ Result = DA.getRelocatedValue(ItemSize, &Offset);
return true;
}
-uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const {
- unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
- uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize;
- RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset);
- if (AI != getStringOffsetsRelocMap().end())
- return AI->second.Value;
- return 0;
-}
-
bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
// FIXME: Support DWARF64.
@@ -149,14 +138,13 @@ bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
}
bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
- DWARFDebugRangeList &RangeList) const {
+ DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
assert(!DieArray.empty());
- DataExtractor RangesData(RangeSection->Data, isLittleEndian,
- getAddressByteSize());
+ DWARFDataExtractor RangesData(*RangeSection, isLittleEndian,
+ getAddressByteSize());
uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
- return RangeList.extract(RangesData, &ActualRangeListOffset,
- RangeSection->Relocs);
+ return RangeList.extract(RangesData, &ActualRangeListOffset);
}
void DWARFUnit::clear() {
@@ -190,7 +178,7 @@ void DWARFUnit::extractDIEsToVector(
uint32_t DIEOffset = Offset + getHeaderSize();
uint32_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntry DIE;
- DataExtractor DebugInfoData = getDebugInfoExtractor();
+ DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
uint32_t Depth = 0;
bool IsCUDie = true;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 41907e570563..0a10e6b78911 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -280,11 +280,10 @@ bool DWARFVerifier::handleDebugLine() {
bool DWARFVerifier::handleAppleNames() {
NumAppleNamesErrors = 0;
- DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data,
- DCtx.isLittleEndian(), 0);
+ DWARFDataExtractor AppleNamesSection(DCtx.getAppleNamesSection(),
+ DCtx.isLittleEndian(), 0);
DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0);
- DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData,
- DCtx.getAppleNamesSection().Relocs);
+ DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData);
if (!AppleNames.extract()) {
return true;
@@ -292,20 +291,80 @@ bool DWARFVerifier::handleAppleNames() {
OS << "Verifying .apple_names...\n";
- // Verify that all buckets have a valid hash index or are empty
+ // Verify that all buckets have a valid hash index or are empty.
uint32_t NumBuckets = AppleNames.getNumBuckets();
uint32_t NumHashes = AppleNames.getNumHashes();
uint32_t BucketsOffset =
AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength();
+ uint32_t HashesBase = BucketsOffset + NumBuckets * 4;
+ uint32_t OffsetsBase = HashesBase + NumHashes * 4;
for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset);
if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
- OS << format("error: Bucket[%d] has invalid hash index: [%d]\n",
- BucketIdx, HashIdx);
+ OS << format("error: Bucket[%d] has invalid hash index: %u\n", BucketIdx,
+ HashIdx);
++NumAppleNamesErrors;
}
}
+
+ uint32_t NumAtoms = AppleNames.getAtomsDesc().size();
+ if (NumAtoms == 0) {
+ OS << "error: no atoms; failed to read HashData\n";
+ ++NumAppleNamesErrors;
+ return false;
+ }
+
+ if (!AppleNames.validateForms()) {
+ OS << "error: unsupported form; failed to read HashData\n";
+ ++NumAppleNamesErrors;
+ return false;
+ }
+
+ for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
+ uint32_t HashOffset = HashesBase + 4 * HashIdx;
+ uint32_t DataOffset = OffsetsBase + 4 * HashIdx;
+ uint32_t Hash = AppleNamesSection.getU32(&HashOffset);
+ uint32_t HashDataOffset = AppleNamesSection.getU32(&DataOffset);
+ if (!AppleNamesSection.isValidOffsetForDataOfSize(HashDataOffset,
+ sizeof(uint64_t))) {
+ OS << format("error: Hash[%d] has invalid HashData offset: 0x%08x\n",
+ HashIdx, HashDataOffset);
+ ++NumAppleNamesErrors;
+ }
+
+ uint32_t StrpOffset;
+ uint32_t StringOffset;
+ uint32_t StringCount = 0;
+ uint32_t DieOffset = dwarf::DW_INVALID_OFFSET;
+
+ while ((StrpOffset = AppleNamesSection.getU32(&HashDataOffset)) != 0) {
+ const uint32_t NumHashDataObjects =
+ AppleNamesSection.getU32(&HashDataOffset);
+ for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
+ ++HashDataIdx) {
+ DieOffset = AppleNames.readAtoms(HashDataOffset);
+ if (!DCtx.getDIEForOffset(DieOffset)) {
+ const uint32_t BucketIdx =
+ NumBuckets ? (Hash % NumBuckets) : UINT32_MAX;
+ StringOffset = StrpOffset;
+ const char *Name = StrData.getCStr(&StringOffset);
+ if (!Name)
+ Name = "<NULL>";
+
+ OS << format(
+ "error: .apple_names Bucket[%d] Hash[%d] = 0x%08x "
+ "Str[%u] = 0x%08x "
+ "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n",
+ BucketIdx, HashIdx, Hash, StringCount, StrpOffset, HashDataIdx,
+ DieOffset, Name);
+
+ ++NumAppleNamesErrors;
+ }
+ }
+ ++StringCount;
+ }
+ }
return NumAppleNamesErrors == 0;
}
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index 434f775097e0..eea70b229c67 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -1,4 +1,4 @@
-//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===//
+//===- DbiModuleList.cpp - PDB module information list --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,17 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index 2ad3f55dc5c3..61188ece2dcb 100644
--- a/lib/DebugInfo/PDB/Native/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/Hash.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/JamCRC.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::support;
diff --git a/lib/DebugInfo/PDB/Native/HashTable.cpp b/lib/DebugInfo/PDB/Native/HashTable.cpp
index ebf8c9c04db1..439217f91d04 100644
--- a/lib/DebugInfo/PDB/Native/HashTable.cpp
+++ b/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -1,4 +1,4 @@
-//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
+//===- HashTable.cpp - PDB Hash Table -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
-
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-
-#include <assert.h>
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
using namespace llvm;
using namespace llvm::pdb;
@@ -106,9 +110,11 @@ void HashTable::clear() {
}
uint32_t HashTable::capacity() const { return Buckets.size(); }
+
uint32_t HashTable::size() const { return Present.count(); }
HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
+
HashTableIterator HashTable::end() const {
return HashTableIterator(*this, 0, true);
}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 83c56574a16e..2e1f61c7a25d 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -9,11 +9,11 @@
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
@@ -97,7 +97,7 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
return make_range(SymbolArray.begin(HadError), SymbolArray.end());
}
-llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
+iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
ModuleDebugStreamRef::subsections() const {
return make_range(Subsections.begin(), Subsections.end());
}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp
+++ /dev/null
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 4f90cd9cd8ac..354b8c0e07ff 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -1,4 +1,4 @@
-//===- NamedStreamMap.cpp - PDB Named Stream Map ----------------*- C++ -*-===//
+//===- NamedStreamMap.cpp - PDB Named Stream Map --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,17 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
+#include <cassert>
#include <cstdint>
+#include <tuple>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index c23120041164..a65782e2d4fc 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -32,9 +32,7 @@ std::unique_ptr<PDBSymbol>
NativeEnumModules::getChildAtIndex(uint32_t Index) const {
if (Index >= Modules.getModuleCount())
return nullptr;
- return std::unique_ptr<PDBSymbol>(new PDBSymbolCompiland(
- Session, std::unique_ptr<IPDBRawSymbol>(new NativeCompilandSymbol(
- Session, 0, Modules.getModuleDescriptor(Index)))));
+ return Session.createCompilandSymbol(Modules.getModuleDescriptor(Index));
}
std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index ed6db63edbab..b4f5c96ce66b 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -1,4 +1,4 @@
-//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -*- C++ -*-===//
+//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
-#include "llvm/Support/ConvertUTF.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -49,7 +40,7 @@ NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
return nullptr;
}
-void NativeRawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
+void NativeRawSymbol::getDataBytes(SmallVector<uint8_t, 32> &bytes) const {
bytes.clear();
}
@@ -109,7 +100,7 @@ uint32_t NativeRawSymbol::getClassParentId() const {
}
std::string NativeRawSymbol::getCompilerName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getCount() const {
@@ -136,7 +127,7 @@ uint32_t NativeRawSymbol::getLexicalParentId() const {
}
std::string NativeRawSymbol::getLibraryName() const {
- return "";
+ return {};
}
uint32_t NativeRawSymbol::getLiveRangeStartAddressOffset() const {
@@ -164,7 +155,7 @@ uint32_t NativeRawSymbol::getMemorySpaceKind() const {
}
std::string NativeRawSymbol::getName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getNumberOfAcceleratorPointerTags() const {
@@ -188,7 +179,7 @@ uint32_t NativeRawSymbol::getNumberOfRows() const {
}
std::string NativeRawSymbol::getObjectFileName() const {
- return "";
+ return {};
}
uint32_t NativeRawSymbol::getOemId() const {
@@ -240,7 +231,7 @@ uint32_t NativeRawSymbol::getSlot() const {
}
std::string NativeRawSymbol::getSourceFileName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getStride() const {
@@ -251,7 +242,7 @@ uint32_t NativeRawSymbol::getSubTypeId() const {
return 0;
}
-std::string NativeRawSymbol::getSymbolsFileName() const { return ""; }
+std::string NativeRawSymbol::getSymbolsFileName() const { return {}; }
uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; }
@@ -292,7 +283,7 @@ uint32_t NativeRawSymbol::getUavSlot() const {
}
std::string NativeRawSymbol::getUndecoratedName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getUnmodifiedTypeId() const {
@@ -701,5 +692,5 @@ bool NativeRawSymbol::wasInlined() const {
}
std::string NativeRawSymbol::getUnused() const {
- return "";
+ return {};
}
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 3ab381e76e62..93d43d9ef341 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -23,8 +24,10 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+
#include <algorithm>
#include <memory>
+#include <utility>
using namespace llvm;
using namespace llvm::msf;
@@ -66,12 +69,23 @@ Error NativeSession::createFromExe(StringRef Path,
return make_error<RawError>(raw_error_code::feature_unsupported);
}
+std::unique_ptr<PDBSymbolCompiland>
+NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) {
+ const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ SymbolCache.push_back(
+ llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI));
+ return llvm::make_unique<PDBSymbolCompiland>(
+ *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
+}
+
uint64_t NativeSession::getLoadAddress() const { return 0; }
void NativeSession::setLoadAddress(uint64_t Address) {}
std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
- auto RawSymbol = llvm::make_unique<NativeExeSymbol>(*this, 0);
+ const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id));
+ auto RawSymbol = SymbolCache[Id]->clone();
auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
std::unique_ptr<PDBSymbolExe> ExeSymbol(
static_cast<PDBSymbolExe *>(PdbSymbol.release()));
@@ -80,7 +94,10 @@ std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
std::unique_ptr<PDBSymbol>
NativeSession::getSymbolById(uint32_t SymbolId) const {
- return nullptr;
+ // If the caller has a SymbolId, it'd better be in our SymbolCache.
+ return SymbolId < SymbolCache.size()
+ ? PDBSymbol::create(*this, SymbolCache[SymbolId]->clone())
+ : nullptr;
}
std::unique_ptr<PDBSymbol>
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index 7e3acc1165f3..501d4f5985b7 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -1,4 +1,4 @@
-//===- PDB.cpp - base header file for creating a PDB reader -----*- C++ -*-===//
+//===- PDB.cpp - base header file for creating a PDB reader ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,18 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDB.h"
-
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/PDB.h"
#if LLVM_ENABLE_DIA_SDK
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#endif
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Error.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -33,7 +29,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromPdb(Path, Session);
#else
- return llvm::make_error<GenericError>("DIA is not installed on the system");
+ return make_error<GenericError>("DIA is not installed on the system");
#endif
}
@@ -46,6 +42,6 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromExe(Path, Session);
#else
- return llvm::make_error<GenericError>("DIA is not installed on the system");
+ return make_error<GenericError>("DIA is not installed on the system");
#endif
}
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index dc22a30facab..faf1142ddf17 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -1,4 +1,4 @@
-//===- PDBExtras.cpp - helper functions and classes for PDBs -----*- C++-*-===//
+//===- PDBExtras.cpp - helper functions and classes for PDBs --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDBExtras.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index da353cb6977c..5f4390bbaf12 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -1,4 +1,4 @@
-//===- UDTLayout.cpp --------------------------------------------*- C++ -*-===//
+//===- UDTLayout.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,20 +8,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/UDTLayout.h"
-
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-
-#include <utility>
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
using namespace llvm;
using namespace llvm::pdb;
@@ -176,7 +181,6 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
else
Bases.push_back(std::move(Base));
}
-
else if (auto Data = unique_dyn_cast<PDBSymbolData>(Child)) {
if (Data->getDataKind() == PDB_DataKind::Member)
Members.push_back(std::move(Data));
@@ -296,4 +300,4 @@ void UDTLayoutBase::addChildToLayout(std::unique_ptr<LayoutItemBase> Child) {
}
ChildStorage.push_back(std::move(Child));
-} \ No newline at end of file
+}
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index b20690c7caaf..690276232a6f 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -193,11 +193,11 @@ public:
}
auto *MPtr = M.release();
ShouldDelete[MPtr] = true;
- auto Deleter =
- [this](Module *Mod) {
- if (ShouldDelete[Mod])
- delete Mod;
- };
+ auto Deleter = [this](Module *Mod) {
+ auto I = ShouldDelete.find(Mod);
+ if (I != ShouldDelete.end() && I->second)
+ delete Mod;
+ };
LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter)));
LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 3d12eadea4dd..8b6f9bef66df 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -78,11 +78,11 @@ public:
void updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr);
// Methods for type inquiry through isa, cast and dyn_cast
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return (isa<ELFObjectFile<ELFT>>(v) &&
classof(cast<ELFObjectFile<ELFT>>(v)));
}
- static inline bool classof(const ELFObjectFile<ELFT> *v) {
+ static bool classof(const ELFObjectFile<ELFT> *v) {
return v->isDyldType();
}
};
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 27150a89d9b2..d387a6f0ecb9 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -716,7 +716,7 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
/// Remove the constant from the constant table.
void ConstantFP::destroyConstantImpl() {
- llvm_unreachable("You can't ConstantInt->destroyConstantImpl()!");
+ llvm_unreachable("You can't ConstantFP->destroyConstantImpl()!");
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index 37e735251fdf..9bd0e297f4ef 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -63,15 +63,22 @@ bool BasicBlockEdge::isSingleEdge() const {
template class llvm::DomTreeNodeBase<BasicBlock>;
template class llvm::DominatorTreeBase<BasicBlock>;
-template void llvm::Calculate<Function, BasicBlock *>(
+template void llvm::DomTreeBuilder::Calculate<Function, BasicBlock *>(
DominatorTreeBase<
typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
&DT,
Function &F);
-template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
+template void llvm::DomTreeBuilder::Calculate<Function, Inverse<BasicBlock *>>(
DominatorTreeBase<typename std::remove_pointer<
GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT,
Function &F);
+template bool llvm::DomTreeBuilder::Verify<BasicBlock *>(
+ const DominatorTreeBase<
+ typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
+ &DT);
+template bool llvm::DomTreeBuilder::Verify<Inverse<BasicBlock *>>(
+ const DominatorTreeBase<typename std::remove_pointer<
+ GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT);
bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -285,6 +292,13 @@ bool DominatorTree::isReachableFromEntry(const Use &U) const {
}
void DominatorTree::verifyDomTree() const {
+ // Perform the expensive checks only when VerifyDomInfo is set.
+ if (VerifyDomInfo && !verify()) {
+ errs() << "\n~~~~~~~~~~~\n\t\tDomTree verification failed!\n~~~~~~~~~~~\n";
+ print(errs());
+ abort();
+ }
+
Function &F = *getRoot()->getParent();
DominatorTree OtherDT;
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index ad0d4470c111..2e13f362344d 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -125,11 +125,18 @@ void LLVMContext::setDiagnosticHandler(DiagnosticHandlerTy DiagnosticHandler,
pImpl->RespectDiagnosticFilters = RespectFilters;
}
-void LLVMContext::setDiagnosticHotnessRequested(bool Requested) {
- pImpl->DiagnosticHotnessRequested = Requested;
+void LLVMContext::setDiagnosticsHotnessRequested(bool Requested) {
+ pImpl->DiagnosticsHotnessRequested = Requested;
}
-bool LLVMContext::getDiagnosticHotnessRequested() const {
- return pImpl->DiagnosticHotnessRequested;
+bool LLVMContext::getDiagnosticsHotnessRequested() const {
+ return pImpl->DiagnosticsHotnessRequested;
+}
+
+void LLVMContext::setDiagnosticsHotnessThreshold(uint64_t Threshold) {
+ pImpl->DiagnosticsHotnessThreshold = Threshold;
+}
+uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
+ return pImpl->DiagnosticsHotnessThreshold;
}
yaml::Output *LLVMContext::getDiagnosticsOutputFile() {
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 4147f71ad9d2..395beb57fe37 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1169,7 +1169,8 @@ public:
LLVMContext::DiagnosticHandlerTy DiagnosticHandler = nullptr;
void *DiagnosticContext = nullptr;
bool RespectDiagnosticFilters = false;
- bool DiagnosticHotnessRequested = false;
+ bool DiagnosticsHotnessRequested = false;
+ uint64_t DiagnosticsHotnessThreshold = 0;
std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
LLVMContext::YieldCallbackTy YieldCallback = nullptr;
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 35032fdd33e1..68b8c9fcb939 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -472,6 +472,36 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
return Error::success();
}
+// Checks whether the given global value is in a non-prevailing comdat
+// (comdat containing values the linker indicated were not prevailing,
+// which we then dropped to available_externally), and if so, removes
+// it from the comdat. This is called for all global values to ensure the
+// comdat is empty rather than leaving an incomplete comdat. It is needed for
+// regular LTO modules, in case we are in a mixed-LTO mode (both regular
+// and thin LTO modules) compilation. Since the regular LTO module will be
+// linked first in the final native link, we want to make sure the linker
+// doesn't select any of these incomplete comdats that would be left
+// in the regular LTO module without this cleanup.
+static void
+handleNonPrevailingComdat(GlobalValue &GV,
+ std::set<const Comdat *> &NonPrevailingComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (!NonPrevailingComdats.count(C))
+ return;
+
+ // Additionally need to drop externally visible global values from the comdat
+ // to available_externally, so that there aren't multiply defined linker
+ // errors.
+ if (!GV.hasLocalLinkage())
+ GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
+
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+}
+
// Add a regular LTO object to the link.
// The resulting module needs to be linked into the combined LTO module with
// linkRegularLTO.
@@ -523,6 +553,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
};
Skip();
+ std::set<const Comdat *> NonPrevailingComdats;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
@@ -557,6 +588,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// module (in linkRegularLTO), based on whether it is undefined.
Mod.Keep.push_back(GV);
GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ if (GV->hasComdat())
+ NonPrevailingComdats.insert(GV->getComdat());
cast<GlobalObject>(GV)->setComdat(nullptr);
}
}
@@ -574,6 +607,9 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
}
+ if (!M.getComdatSymbolTable().empty())
+ for (GlobalValue &GV : M.global_values())
+ handleNonPrevailingComdat(GV, NonPrevailingComdats);
assert(MsymI == MsymE);
return std::move(Mod);
}
@@ -1087,7 +1123,7 @@ lto::setupOptimizationRemarks(LLVMContext &Context,
Context.setDiagnosticsOutputFile(
llvm::make_unique<yaml::Output>(DiagnosticFile->os()));
if (LTOPassRemarksWithHotness)
- Context.setDiagnosticHotnessRequested(true);
+ Context.setDiagnosticsHotnessRequested(true);
DiagnosticFile->keep();
return std::move(DiagnosticFile);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 92c5da0e9fef..0318d916aa49 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -261,9 +261,9 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
Value -= Offset;
}
- // Let the backend adjust the fixup value if necessary, including whether
- // we need a relocation.
- Backend.processFixupValue(*this, Fixup, Target, IsResolved);
+ // Let the backend force a relocation if needed.
+ if (IsResolved && Backend.shouldForceRelocation(*this, Fixup, Target))
+ IsResolved = false;
return IsResolved;
}
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 45534ba18212..82352cb50c70 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -265,7 +265,8 @@ private:
uint32_t NumFuncImports);
void writeCodeRelocSection();
void writeDataRelocSection(uint64_t DataSectionHeaderSize);
- void writeLinkingMetaDataSection(ArrayRef<StringRef> WeakSymbols,
+ void writeLinkingMetaDataSection(uint32_t DataSize, uint32_t DataAlignment,
+ ArrayRef<StringRef> WeakSymbols,
bool HasStackPointer,
uint32_t StackPointerGlobal);
@@ -877,11 +878,8 @@ void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) {
}
void WasmObjectWriter::writeLinkingMetaDataSection(
- ArrayRef<StringRef> WeakSymbols, bool HasStackPointer,
- uint32_t StackPointerGlobal) {
- if (!HasStackPointer && WeakSymbols.empty())
- return;
-
+ uint32_t DataSize, uint32_t DataAlignment, ArrayRef<StringRef> WeakSymbols,
+ bool HasStackPointer, uint32_t StackPointerGlobal) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
SectionBookkeeping SubSection;
@@ -902,6 +900,16 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
endSection(SubSection);
}
+ if (DataSize > 0) {
+ startSection(SubSection, wasm::WASM_DATA_SIZE);
+ encodeULEB128(DataSize, getStream());
+ endSection(SubSection);
+
+ startSection(SubSection, wasm::WASM_DATA_ALIGNMENT);
+ encodeULEB128(DataAlignment, getStream());
+ endSection(SubSection);
+ }
+
endSection(Section);
}
@@ -923,6 +931,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
unsigned NumFuncImports = 0;
unsigned NumGlobalImports = 0;
SmallVector<char, 0> DataBytes;
+ uint32_t DataAlignment = 1;
uint32_t StackPointerGlobal = 0;
bool HasStackPointer = false;
@@ -1157,6 +1166,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
report_fatal_error("data sections must contain at most one variable");
DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
+ DataAlignment = std::max(DataAlignment, DataSection.getAlignment());
DataSection.setSectionOffset(DataBytes.size());
@@ -1272,7 +1282,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
writeNameSection(Functions, Imports, NumFuncImports);
writeCodeRelocSection();
writeDataRelocSection(DataSectionHeaderSize);
- writeLinkingMetaDataSection(WeakSymbols, HasStackPointer, StackPointerGlobal);
+ writeLinkingMetaDataSection(DataBytes.size(), DataAlignment, WeakSymbols, HasStackPointer, StackPointerGlobal);
// TODO: Translate the .comment section to the output.
// TODO: Translate debug sections to the output.
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 1d08a9efd8b3..fd5e7707c541 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -27,4 +27,5 @@ add_llvm_library(LLVMObject
DEPENDS
intrinsics_gen
+ llvm_vcsrevision_h
)
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 9a760d86e7e2..1e9b0c5b0454 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -883,7 +883,7 @@ base_reloc_iterator COFFObjectFile::base_reloc_end() const {
}
uint8_t COFFObjectFile::getBytesInAddress() const {
- return getArch() == Triple::x86_64 ? 8 : 4;
+ return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4;
}
StringRef COFFObjectFile::getFileFormatName() const {
@@ -1216,6 +1216,29 @@ void COFFObjectFile::getRelocationTypeName(
Res = "Unknown";
}
break;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ switch (Reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
+ default:
+ Res = "Unknown";
+ }
+ break;
case COFF::IMAGE_FILE_MACHINE_I386:
switch (Reloc->Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index 7bca032a7be1..7a6424a76a98 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <string>
@@ -43,6 +44,21 @@ using namespace irsymtab;
namespace {
+const char *getExpectedProducerName() {
+ static char DefaultName[] = LLVM_VERSION_STRING
+#ifdef LLVM_REVISION
+ " " LLVM_REVISION
+#endif
+ ;
+ // Allows for testing of the irsymtab writer and upgrade mechanism. This
+ // environment variable should not be set by users.
+ if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))
+ return OverrideName;
+ return DefaultName;
+}
+
+const char *kExpectedProducerName = getExpectedProducerName();
+
/// Stores the temporary state that is required to build an IR symbol table.
struct Builder {
SmallVector<char, 0> &Symtab;
@@ -231,6 +247,8 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
storage::Header Hdr;
assert(!IRMods.empty());
+ Hdr.Version = storage::Header::kCurrentVersion;
+ setStr(Hdr.Producer, kExpectedProducerName);
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = Triple(IRMods[0]->getTargetTriple());
@@ -300,7 +318,31 @@ Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
return make_error<StringError>("Bitcode file does not contain any modules",
inconvertibleErrorCode());
- // Right now we have no on-disk representation of symbol tables, so we always
- // upgrade.
- return upgrade(BFC.Mods);
+ if (BFC.StrtabForSymtab.empty() ||
+ BFC.Symtab.size() < sizeof(storage::Header))
+ return upgrade(BFC.Mods);
+
+ // We cannot use the regular reader to read the version and producer, because
+ // it will expect the header to be in the current format. The only thing we
+ // can rely on is that the version and producer will be present as the first
+ // struct elements.
+ auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());
+ unsigned Version = Hdr->Version;
+ StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);
+ if (Version != storage::Header::kCurrentVersion ||
+ Producer != kExpectedProducerName)
+ return upgrade(BFC.Mods);
+
+ FileContents FC;
+ FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},
+ {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};
+
+ // Finally, make sure that the number of modules in the symbol table matches
+ // the number of modules in the bitcode file. If they differ, it may mean that
+ // the bitcode file was created by binary concatenation, so we need to create
+ // a new symbol table from scratch.
+ if (FC.TheReader.getNumModules() != BFC.Mods.size())
+ return upgrade(std::move(BFC.Mods));
+
+ return std::move(FC);
}
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index d15860674aeb..fff497ba5564 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -193,6 +193,9 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr,
WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
: ObjectFile(Binary::ID_Wasm, Buffer) {
+ LinkingData.DataAlignment = 0;
+ LinkingData.DataSize = 0;
+
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
@@ -291,6 +294,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
const uint8_t *End) {
+ HasLinkingSection = true;
while (Ptr < End) {
uint8_t Type = readVarint7(Ptr);
uint32_t Size = readVaruint32(Ptr);
@@ -305,7 +309,7 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
auto iter = SymbolMap.find(Symbol);
if (iter == SymbolMap.end()) {
return make_error<GenericBinaryError>(
- "Invalid symbol name in linking section",
+ "Invalid symbol name in linking section: " + Symbol,
object_error::parse_failed);
}
uint32_t SymIndex = iter->second;
@@ -318,6 +322,12 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
}
break;
}
+ case wasm::WASM_DATA_SIZE:
+ LinkingData.DataSize = readVaruint32(Ptr);
+ break;
+ case wasm::WASM_DATA_ALIGNMENT:
+ LinkingData.DataAlignment = readVaruint32(Ptr);
+ break;
case wasm::WASM_STACK_POINTER:
default:
Ptr += Size;
@@ -941,7 +951,9 @@ SubtargetFeatures WasmObjectFile::getFeatures() const {
return SubtargetFeatures();
}
-bool WasmObjectFile::isRelocatableObject() const { return false; }
+bool WasmObjectFile::isRelocatableObject() const {
+ return HasLinkingSection;
+}
const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const {
assert(Ref.d.a < Sections.size());
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index ff9b9ca35eb5..1371eacdf8f2 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -563,7 +563,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize);
Symbol->Value = DataOffsets[i];
- Symbol->SectionNumber = 1;
+ Symbol->SectionNumber = 2;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp
index c8cbea1490f6..1103159fc98d 100644
--- a/lib/ObjectYAML/COFFYAML.cpp
+++ b/lib/ObjectYAML/COFFYAML.cpp
@@ -12,17 +12,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/COFFYAML.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <cstring>
#define ECase(X) IO.enumCase(Value, #X, COFF::X);
+
namespace llvm {
namespace COFFYAML {
+
Section::Section() { memset(&Header, 0, sizeof(COFF::section)); }
Symbol::Symbol() { memset(&Header, 0, sizeof(COFF::symbol)); }
Object::Object() { memset(&Header, 0, sizeof(COFF::header)); }
-}
+
+} // end namespace COFFYAML
namespace yaml {
+
void ScalarEnumerationTraits<COFFYAML::COMDATType>::enumeration(
IO &IO, COFFYAML::COMDATType &Value) {
IO.enumCase(Value, "0", 0);
@@ -172,20 +180,20 @@ void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration(
void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration(
IO &IO, COFF::WindowsSubsystem &Value) {
- ECase(IMAGE_SUBSYSTEM_UNKNOWN);
- ECase(IMAGE_SUBSYSTEM_NATIVE);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI);
- ECase(IMAGE_SUBSYSTEM_OS2_CUI);
- ECase(IMAGE_SUBSYSTEM_POSIX_CUI);
- ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI);
- ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION);
- ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER);
- ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER);
- ECase(IMAGE_SUBSYSTEM_EFI_ROM);
- ECase(IMAGE_SUBSYSTEM_XBOX);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION);
+ ECase(IMAGE_SUBSYSTEM_UNKNOWN);
+ ECase(IMAGE_SUBSYSTEM_NATIVE);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI);
+ ECase(IMAGE_SUBSYSTEM_OS2_CUI);
+ ECase(IMAGE_SUBSYSTEM_POSIX_CUI);
+ ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI);
+ ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION);
+ ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_ROM);
+ ECase(IMAGE_SUBSYSTEM_XBOX);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION);
}
#undef ECase
@@ -252,12 +260,15 @@ void ScalarBitSetTraits<COFF::DLLCharacteristics>::bitset(
#undef BCase
namespace {
+
struct NSectionSelectionType {
NSectionSelectionType(IO &)
: SelectionType(COFFYAML::COMDATType(0)) {}
NSectionSelectionType(IO &, uint8_t C)
: SelectionType(COFFYAML::COMDATType(C)) {}
+
uint8_t denormalize(IO &) { return SelectionType; }
+
COFFYAML::COMDATType SelectionType;
};
@@ -266,7 +277,9 @@ struct NWeakExternalCharacteristics {
: Characteristics(COFFYAML::WeakExternalCharacteristics(0)) {}
NWeakExternalCharacteristics(IO &, uint32_t C)
: Characteristics(COFFYAML::WeakExternalCharacteristics(C)) {}
+
uint32_t denormalize(IO &) { return Characteristics; }
+
COFFYAML::WeakExternalCharacteristics Characteristics;
};
@@ -275,7 +288,9 @@ struct NSectionCharacteristics {
: Characteristics(COFF::SectionCharacteristics(0)) {}
NSectionCharacteristics(IO &, uint32_t C)
: Characteristics(COFF::SectionCharacteristics(C)) {}
+
uint32_t denormalize(IO &) { return Characteristics; }
+
COFF::SectionCharacteristics Characteristics;
};
@@ -284,13 +299,16 @@ struct NAuxTokenType {
: AuxType(COFFYAML::AuxSymbolType(0)) {}
NAuxTokenType(IO &, uint8_t C)
: AuxType(COFFYAML::AuxSymbolType(C)) {}
+
uint32_t denormalize(IO &) { return AuxType; }
+
COFFYAML::AuxSymbolType AuxType;
};
struct NStorageClass {
NStorageClass(IO &) : StorageClass(COFF::SymbolStorageClass(0)) {}
NStorageClass(IO &, uint8_t S) : StorageClass(COFF::SymbolStorageClass(S)) {}
+
uint8_t denormalize(IO &) { return StorageClass; }
COFF::SymbolStorageClass StorageClass;
@@ -299,7 +317,9 @@ struct NStorageClass {
struct NMachine {
NMachine(IO &) : Machine(COFF::MachineTypes(0)) {}
NMachine(IO &, uint16_t M) : Machine(COFF::MachineTypes(M)) {}
+
uint16_t denormalize(IO &) { return Machine; }
+
COFF::MachineTypes Machine;
};
@@ -307,6 +327,7 @@ struct NHeaderCharacteristics {
NHeaderCharacteristics(IO &) : Characteristics(COFF::Characteristics(0)) {}
NHeaderCharacteristics(IO &, uint16_t C)
: Characteristics(COFF::Characteristics(C)) {}
+
uint16_t denormalize(IO &) { return Characteristics; }
COFF::Characteristics Characteristics;
@@ -316,13 +337,16 @@ template <typename RelocType>
struct NType {
NType(IO &) : Type(RelocType(0)) {}
NType(IO &, uint16_t T) : Type(RelocType(T)) {}
+
uint16_t denormalize(IO &) { return Type; }
+
RelocType Type;
};
struct NWindowsSubsystem {
NWindowsSubsystem(IO &) : Subsystem(COFF::WindowsSubsystem(0)) {}
NWindowsSubsystem(IO &, uint16_t C) : Subsystem(COFF::WindowsSubsystem(C)) {}
+
uint16_t denormalize(IO &) { return Subsystem; }
COFF::WindowsSubsystem Subsystem;
@@ -332,12 +356,13 @@ struct NDLLCharacteristics {
NDLLCharacteristics(IO &) : Characteristics(COFF::DLLCharacteristics(0)) {}
NDLLCharacteristics(IO &, uint16_t C)
: Characteristics(COFF::DLLCharacteristics(C)) {}
+
uint16_t denormalize(IO &) { return Characteristics; }
COFF::DLLCharacteristics Characteristics;
};
-}
+} // end anonymous namespace
void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
COFFYAML::Relocation &Rel) {
@@ -509,5 +534,6 @@ void MappingTraits<COFFYAML::Object>::mapping(IO &IO, COFFYAML::Object &Obj) {
IO.mapRequired("symbols", Obj.Symbols);
}
-}
-}
+} // end namespace yaml
+
+} // end namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index d194420d5ef4..60b0ea28030a 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -13,9 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
-
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
@@ -24,15 +26,29 @@
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
-#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
-#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::CodeViewYAML;
@@ -48,9 +64,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite)
LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport)
LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport)
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false)
LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind)
@@ -70,21 +84,25 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite)
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
struct YAMLSubsectionBase {
explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {}
- DebugSubsectionKind Kind;
- virtual ~YAMLSubsectionBase() {}
+ virtual ~YAMLSubsectionBase() = default;
virtual void map(IO &IO) = 0;
virtual std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
const codeview::StringsAndChecksums &SC) const = 0;
+
+ DebugSubsectionKind Kind;
};
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
namespace {
+
struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
YAMLChecksumsSubsection()
: YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {}
@@ -215,7 +233,8 @@ struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase {
std::vector<uint32_t> RVAs;
};
-}
+
+} // end anonymous namespace
void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) {
io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns);
@@ -743,8 +762,9 @@ llvm::CodeViewYAML::toCodeViewSubsectionList(
}
namespace {
+
struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
- SubsectionConversionVisitor() {}
+ SubsectionConversionVisitor() = default;
Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
Error visitLines(DebugLinesSubsectionRef &Lines,
@@ -769,6 +789,8 @@ struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
YAMLDebugSubsection Subsection;
};
+} // end anonymous namespace
+
Error SubsectionConversionVisitor::visitUnknown(
DebugUnknownSubsectionRef &Unknown) {
return make_error<CodeViewError>(cv_error_code::operation_unsupported);
@@ -865,7 +887,6 @@ Error SubsectionConversionVisitor::visitCOFFSymbolRVAs(
Subsection.Subsection = *Result;
return Error::success();
}
-}
Expected<YAMLDebugSubsection>
YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC,
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 83f3d55b8e55..dbe4e2a6d6fd 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -13,13 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -27,7 +39,6 @@ using namespace llvm::CodeViewYAML;
using namespace llvm::CodeViewYAML::detail;
using namespace llvm::yaml;
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
// We only need to declare these, the definitions are in CodeViewYAMLTypes.cpp
@@ -49,15 +60,16 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId)
LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType)
LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal)
-LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, TypeName)
+LLVM_YAML_STRONG_TYPEDEF(StringRef, TypeName)
LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true)
StringRef ScalarTraits<TypeName>::input(StringRef S, void *V, TypeName &T) {
return ScalarTraits<StringRef>::input(S, V, T.value);
}
+
void ScalarTraits<TypeName>::output(const TypeName &T, void *V,
- llvm::raw_ostream &R) {
+ raw_ostream &R) {
ScalarTraits<StringRef>::output(T.value, V, R);
}
@@ -174,9 +186,10 @@ namespace detail {
struct SymbolRecordBase {
codeview::SymbolKind Kind;
+
explicit SymbolRecordBase(codeview::SymbolKind K) : Kind(K) {}
+ virtual ~SymbolRecordBase() = default;
- virtual ~SymbolRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual codeview::CVSymbol
toCodeViewSymbol(BumpPtrAllocator &Allocator,
@@ -195,6 +208,7 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase {
CodeViewContainer Container) const override {
return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container);
}
+
Error fromCodeViewSymbol(codeview::CVSymbol CVS) override {
return SymbolDeserializer::deserializeAs<T>(CVS, Symbol);
}
@@ -218,6 +232,7 @@ struct UnknownSymbolRecord : public SymbolRecordBase {
::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen));
}
+
Error fromCodeViewSymbol(CVSymbol CVS) override {
this->Kind = CVS.kind();
Data = CVS.RecordData.drop_front(sizeof(RecordPrefix));
@@ -497,9 +512,10 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("DisplayName", Symbol.Name);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
BumpPtrAllocator &Allocator, CodeViewContainer Container) const {
@@ -508,11 +524,13 @@ CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
namespace llvm {
namespace yaml {
+
template <> struct MappingTraits<SymbolRecordBase> {
static void mapping(IO &io, SymbolRecordBase &Record) { Record.map(io); }
};
-}
-}
+
+} // end namespace yaml
+} // end namespace llvm
template <typename SymbolType>
static inline Expected<CodeViewYAML::SymbolRecord>
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 2d1cb4b1b27b..0b2ea61c5fe0 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -13,14 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLTypes.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -29,7 +44,6 @@ using namespace llvm::CodeViewYAML::detail;
using namespace llvm::yaml;
LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord)
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
@@ -63,9 +77,10 @@ namespace detail {
struct LeafRecordBase {
TypeLeafKind Kind;
+
explicit LeafRecordBase(TypeLeafKind K) : Kind(K) {}
+ virtual ~LeafRecordBase() = default;
- virtual ~LeafRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0;
virtual Error fromCodeViewRecord(CVType Type) = 0;
@@ -101,9 +116,10 @@ template <> struct LeafRecordImpl<FieldListRecord> : public LeafRecordBase {
struct MemberRecordBase {
TypeLeafKind Kind;
+
explicit MemberRecordBase(TypeLeafKind K) : Kind(K) {}
+ virtual ~MemberRecordBase() = default;
- virtual ~MemberRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual void writeTo(FieldListRecordBuilder &FLRB) = 0;
};
@@ -111,6 +127,7 @@ struct MemberRecordBase {
template <typename T> struct MemberRecordImpl : public MemberRecordBase {
explicit MemberRecordImpl(TypeLeafKind K)
: MemberRecordBase(K), Record(static_cast<TypeRecordKind>(K)) {}
+
void map(yaml::IO &io) override;
void writeTo(FieldListRecordBuilder &FLRB) override {
@@ -119,12 +136,13 @@ template <typename T> struct MemberRecordImpl : public MemberRecordBase {
mutable T Record;
};
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
void ScalarTraits<TypeIndex>::output(const TypeIndex &S, void *,
- llvm::raw_ostream &OS) {
+ raw_ostream &OS) {
OS << S.getIndex();
}
@@ -136,8 +154,7 @@ StringRef ScalarTraits<TypeIndex>::input(StringRef Scalar, void *Ctx,
return Result;
}
-void ScalarTraits<APSInt>::output(const APSInt &S, void *,
- llvm::raw_ostream &OS) {
+void ScalarTraits<APSInt>::output(const APSInt &S, void *, raw_ostream &OS) {
S.print(OS, S.isSigned());
}
@@ -346,6 +363,7 @@ void MappingTraits<MemberPointerInfo>::mapping(IO &IO, MemberPointerInfo &MPI) {
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
template <> void LeafRecordImpl<ModifierRecord>::map(IO &IO) {
IO.mapRequired("ModifiedType", Record.ModifiedType);
IO.mapRequired("Modifiers", Record.Modifiers);
@@ -404,11 +422,13 @@ template <> void LeafRecordImpl<ArrayRecord>::map(IO &IO) {
void LeafRecordImpl<FieldListRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Members);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
namespace {
+
class MemberRecordConversionVisitor : public TypeVisitorCallbacks {
public:
explicit MemberRecordConversionVisitor(std::vector<MemberRecord> &Records)
@@ -433,7 +453,8 @@ private:
std::vector<MemberRecord> &Records;
};
-}
+
+} // end anonymous namespace
Error LeafRecordImpl<FieldListRecord>::fromCodeViewRecord(CVType Type) {
MemberRecordConversionVisitor V(Members);
@@ -461,13 +482,13 @@ void MappingTraits<OneMethodRecord>::mapping(IO &io, OneMethodRecord &Record) {
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
template <> void LeafRecordImpl<ClassRecord>::map(IO &IO) {
IO.mapRequired("MemberCount", Record.MemberCount);
IO.mapRequired("Options", Record.Options);
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("DerivationList", Record.DerivationList);
IO.mapRequired("VTableShape", Record.VTableShape);
IO.mapRequired("Size", Record.Size);
@@ -479,7 +500,6 @@ template <> void LeafRecordImpl<UnionRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("Size", Record.Size);
}
@@ -489,7 +509,6 @@ template <> void LeafRecordImpl<EnumRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("UnderlyingType", Record.UnderlyingType);
}
@@ -603,9 +622,10 @@ template <> void MemberRecordImpl<VirtualBaseClassRecord>::map(IO &IO) {
template <> void MemberRecordImpl<ListContinuationRecord>::map(IO &IO) {
IO.mapRequired("ContinuationIndex", Record.ContinuationIndex);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
template <typename T>
static inline Expected<LeafRecord> fromCodeViewRecordImpl(CVType Type) {
@@ -628,7 +648,8 @@ Expected<LeafRecord> LeafRecord::fromCodeViewRecord(CVType Type) {
#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName)
switch (Type.kind()) {
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
- default: { llvm_unreachable("Unknown leaf kind!"); }
+ default:
+ llvm_unreachable("Unknown leaf kind!");
}
return make_error<CodeViewError>(cv_error_code::corrupt_record);
}
@@ -644,6 +665,7 @@ CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const {
namespace llvm {
namespace yaml {
+
template <> struct MappingTraits<LeafRecordBase> {
static void mapping(IO &io, LeafRecordBase &Record) { Record.map(io); }
};
@@ -651,8 +673,9 @@ template <> struct MappingTraits<LeafRecordBase> {
template <> struct MappingTraits<MemberRecordBase> {
static void mapping(IO &io, MemberRecordBase &Record) { Record.map(io); }
};
-}
-}
+
+} // end namespace yaml
+} // end namespace llvm
template <typename ConcreteType>
static void mapLeafRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind,
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
index 91c928771a65..89fc652035ca 100644
--- a/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -13,15 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "DWARFVisitor.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
-
-#include "DWARFVisitor.h"
-
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
using namespace llvm;
@@ -127,7 +137,7 @@ class DumpVisitor : public DWARFYAML::ConstVisitor {
raw_ostream &OS;
protected:
- virtual void onStartCompileUnit(const DWARFYAML::Unit &CU) {
+ void onStartCompileUnit(const DWARFYAML::Unit &CU) override {
writeInitialLength(CU.Length, OS, DebugInfo.IsLittleEndian);
writeInteger((uint16_t)CU.Version, OS, DebugInfo.IsLittleEndian);
if(CU.Version >= 5) {
@@ -141,41 +151,43 @@ protected:
}
- virtual void onStartDIE(const DWARFYAML::Unit &CU,
- const DWARFYAML::Entry &DIE) {
+ void onStartDIE(const DWARFYAML::Unit &CU,
+ const DWARFYAML::Entry &DIE) override {
encodeULEB128(DIE.AbbrCode, OS);
}
- virtual void onValue(const uint8_t U) {
+ void onValue(const uint8_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint16_t U) {
+ void onValue(const uint16_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint32_t U) {
+
+ void onValue(const uint32_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint64_t U, const bool LEB = false) {
+
+ void onValue(const uint64_t U, const bool LEB = false) override {
if (LEB)
encodeULEB128(U, OS);
else
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const int64_t S, const bool LEB = false) {
+ void onValue(const int64_t S, const bool LEB = false) override {
if (LEB)
encodeSLEB128(S, OS);
else
writeInteger(S, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const StringRef String) {
+ void onValue(const StringRef String) override {
OS.write(String.data(), String.size());
OS.write('\0');
}
- virtual void onValue(const MemoryBufferRef MBR) {
+ void onValue(const MemoryBufferRef MBR) override {
OS.write(MBR.getBufferStart(), MBR.getBufferSize());
}
@@ -280,7 +292,7 @@ void DWARFYAML::EmitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) {
}
}
-typedef void (*EmitFuncType)(raw_ostream &, const DWARFYAML::Data &);
+using EmitFuncType = void (*)(raw_ostream &, const DWARFYAML::Data &);
static void
EmitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc,
diff --git a/lib/ObjectYAML/DWARFYAML.cpp b/lib/ObjectYAML/DWARFYAML.cpp
index edb9545f14b1..d6c09e1a35d7 100644
--- a/lib/ObjectYAML/DWARFYAML.cpp
+++ b/lib/ObjectYAML/DWARFYAML.cpp
@@ -171,6 +171,6 @@ void MappingTraits<DWARFYAML::InitialLength>::mapping(
IO.mapRequired("TotalLength64", InitialLength.TotalLength64);
}
-} // namespace llvm::yaml
+} // end namespace yaml
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index dbd5498e003d..39741dab327a 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -12,12 +12,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/ELFYAML.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cassert>
+#include <cstdint>
namespace llvm {
-ELFYAML::Section::~Section() {}
+ELFYAML::Section::~Section() = default;
namespace yaml {
@@ -542,6 +548,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
llvm_unreachable("Unsupported architecture");
}
#undef ELF_RELOC
+ IO.enumFallback<Hex32>(Value);
}
void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
@@ -643,6 +650,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
}
namespace {
+
struct NormalizedOther {
NormalizedOther(IO &)
: Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {}
@@ -654,7 +662,8 @@ struct NormalizedOther {
ELFYAML::ELF_STV Visibility;
ELFYAML::ELF_STO Other;
};
-}
+
+} // end anonymous namespace
void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Name", Symbol.Name, StringRef());
@@ -777,6 +786,7 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
}
namespace {
+
struct NormalizedMips64RelType {
NormalizedMips64RelType(IO &)
: Type(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)),
@@ -797,7 +807,8 @@ struct NormalizedMips64RelType {
ELFYAML::ELF_REL Type3;
ELFYAML::ELF_RSS SpecSym;
};
-}
+
+} // end anonymous namespace
void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
ELFYAML::Relocation &Rel) {
@@ -838,4 +849,5 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_ASE)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_FLAGS1)
} // end namespace yaml
+
} // end namespace llvm
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index 461684827872..ab452a7bf6ef 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -12,16 +12,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
-
-#include <string.h> // For memcpy, memset and strnlen.
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
+#include <cstdint>
+#include <cstring>
namespace llvm {
-MachOYAML::LoadCommand::~LoadCommand() {}
+MachOYAML::LoadCommand::~LoadCommand() = default;
bool MachOYAML::LinkEditData::isEmpty() const {
return 0 ==
@@ -33,7 +36,7 @@ bool MachOYAML::LinkEditData::isEmpty() const {
namespace yaml {
void ScalarTraits<char_16>::output(const char_16 &Val, void *,
- llvm::raw_ostream &Out) {
+ raw_ostream &Out) {
auto Len = strnlen(&Val[0], 16);
Out << StringRef(&Val[0], Len);
}
@@ -51,8 +54,7 @@ StringRef ScalarTraits<char_16>::input(StringRef Scalar, void *, char_16 &Val) {
bool ScalarTraits<char_16>::mustQuote(StringRef S) { return needsQuotes(S); }
-void ScalarTraits<uuid_t>::output(const uuid_t &Val, void *,
- llvm::raw_ostream &Out) {
+void ScalarTraits<uuid_t>::output(const uuid_t &Val, void *, raw_ostream &Out) {
for (int Idx = 0; Idx < 16; ++Idx) {
Out << format("%02" PRIX32, Val[Idx]);
if (Idx == 3 || Idx == 5 || Idx == 7 || Idx == 9)
@@ -154,7 +156,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes);
IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes);
IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes);
- if(LinkEditData.ExportTrie.Children.size() > 0 || !IO.outputting())
+ if (!LinkEditData.ExportTrie.Children.empty() || !IO.outputting())
IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
IO.mapOptional("NameList", LinkEditData.NameList);
IO.mapOptional("StringTable", LinkEditData.StringTable);
@@ -308,13 +310,11 @@ void MappingTraits<MachO::dylib_command>::mapping(
void MappingTraits<MachO::dylinker_command>::mapping(
IO &IO, MachO::dylinker_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
}
void MappingTraits<MachO::dysymtab_command>::mapping(
IO &IO, MachO::dysymtab_command &LoadCommand) {
-
IO.mapRequired("ilocalsym", LoadCommand.ilocalsym);
IO.mapRequired("nlocalsym", LoadCommand.nlocalsym);
IO.mapRequired("iextdefsym", LoadCommand.iextdefsym);
@@ -337,7 +337,6 @@ void MappingTraits<MachO::dysymtab_command>::mapping(
void MappingTraits<MachO::encryption_info_command>::mapping(
IO &IO, MachO::encryption_info_command &LoadCommand) {
-
IO.mapRequired("cryptoff", LoadCommand.cryptoff);
IO.mapRequired("cryptsize", LoadCommand.cryptsize);
IO.mapRequired("cryptid", LoadCommand.cryptid);
@@ -345,7 +344,6 @@ void MappingTraits<MachO::encryption_info_command>::mapping(
void MappingTraits<MachO::encryption_info_command_64>::mapping(
IO &IO, MachO::encryption_info_command_64 &LoadCommand) {
-
IO.mapRequired("cryptoff", LoadCommand.cryptoff);
IO.mapRequired("cryptsize", LoadCommand.cryptsize);
IO.mapRequired("cryptid", LoadCommand.cryptid);
@@ -354,14 +352,12 @@ void MappingTraits<MachO::encryption_info_command_64>::mapping(
void MappingTraits<MachO::entry_point_command>::mapping(
IO &IO, MachO::entry_point_command &LoadCommand) {
-
IO.mapRequired("entryoff", LoadCommand.entryoff);
IO.mapRequired("stacksize", LoadCommand.stacksize);
}
void MappingTraits<MachO::fvmfile_command>::mapping(
IO &IO, MachO::fvmfile_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
IO.mapRequired("header_addr", LoadCommand.header_addr);
}
@@ -374,7 +370,6 @@ void MappingTraits<MachO::fvmlib>::mapping(IO &IO, MachO::fvmlib &FVMLib) {
void MappingTraits<MachO::fvmlib_command>::mapping(
IO &IO, MachO::fvmlib_command &LoadCommand) {
-
IO.mapRequired("fvmlib", LoadCommand.fvmlib);
}
@@ -383,20 +378,17 @@ void MappingTraits<MachO::ident_command>::mapping(
void MappingTraits<MachO::linkedit_data_command>::mapping(
IO &IO, MachO::linkedit_data_command &LoadCommand) {
-
IO.mapRequired("dataoff", LoadCommand.dataoff);
IO.mapRequired("datasize", LoadCommand.datasize);
}
void MappingTraits<MachO::linker_option_command>::mapping(
IO &IO, MachO::linker_option_command &LoadCommand) {
-
IO.mapRequired("count", LoadCommand.count);
}
void MappingTraits<MachO::prebind_cksum_command>::mapping(
IO &IO, MachO::prebind_cksum_command &LoadCommand) {
-
IO.mapRequired("cksum", LoadCommand.cksum);
}
@@ -405,7 +397,6 @@ void MappingTraits<MachO::load_command>::mapping(
void MappingTraits<MachO::prebound_dylib_command>::mapping(
IO &IO, MachO::prebound_dylib_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
IO.mapRequired("nmodules", LoadCommand.nmodules);
IO.mapRequired("linked_modules", LoadCommand.linked_modules);
@@ -413,7 +404,6 @@ void MappingTraits<MachO::prebound_dylib_command>::mapping(
void MappingTraits<MachO::routines_command>::mapping(
IO &IO, MachO::routines_command &LoadCommand) {
-
IO.mapRequired("init_address", LoadCommand.init_address);
IO.mapRequired("init_module", LoadCommand.init_module);
IO.mapRequired("reserved1", LoadCommand.reserved1);
@@ -426,7 +416,6 @@ void MappingTraits<MachO::routines_command>::mapping(
void MappingTraits<MachO::routines_command_64>::mapping(
IO &IO, MachO::routines_command_64 &LoadCommand) {
-
IO.mapRequired("init_address", LoadCommand.init_address);
IO.mapRequired("init_module", LoadCommand.init_module);
IO.mapRequired("reserved1", LoadCommand.reserved1);
@@ -439,7 +428,6 @@ void MappingTraits<MachO::routines_command_64>::mapping(
void MappingTraits<MachO::rpath_command>::mapping(
IO &IO, MachO::rpath_command &LoadCommand) {
-
IO.mapRequired("path", LoadCommand.path);
}
@@ -475,7 +463,6 @@ void MappingTraits<MachO::section_64>::mapping(IO &IO,
void MappingTraits<MachO::segment_command>::mapping(
IO &IO, MachO::segment_command &LoadCommand) {
-
IO.mapRequired("segname", LoadCommand.segname);
IO.mapRequired("vmaddr", LoadCommand.vmaddr);
IO.mapRequired("vmsize", LoadCommand.vmsize);
@@ -489,7 +476,6 @@ void MappingTraits<MachO::segment_command>::mapping(
void MappingTraits<MachO::segment_command_64>::mapping(
IO &IO, MachO::segment_command_64 &LoadCommand) {
-
IO.mapRequired("segname", LoadCommand.segname);
IO.mapRequired("vmaddr", LoadCommand.vmaddr);
IO.mapRequired("vmsize", LoadCommand.vmsize);
@@ -503,44 +489,37 @@ void MappingTraits<MachO::segment_command_64>::mapping(
void MappingTraits<MachO::source_version_command>::mapping(
IO &IO, MachO::source_version_command &LoadCommand) {
-
IO.mapRequired("version", LoadCommand.version);
}
void MappingTraits<MachO::sub_client_command>::mapping(
IO &IO, MachO::sub_client_command &LoadCommand) {
-
IO.mapRequired("client", LoadCommand.client);
}
void MappingTraits<MachO::sub_framework_command>::mapping(
IO &IO, MachO::sub_framework_command &LoadCommand) {
-
IO.mapRequired("umbrella", LoadCommand.umbrella);
}
void MappingTraits<MachO::sub_library_command>::mapping(
IO &IO, MachO::sub_library_command &LoadCommand) {
-
IO.mapRequired("sub_library", LoadCommand.sub_library);
}
void MappingTraits<MachO::sub_umbrella_command>::mapping(
IO &IO, MachO::sub_umbrella_command &LoadCommand) {
-
IO.mapRequired("sub_umbrella", LoadCommand.sub_umbrella);
}
void MappingTraits<MachO::symseg_command>::mapping(
IO &IO, MachO::symseg_command &LoadCommand) {
-
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("size", LoadCommand.size);
}
void MappingTraits<MachO::symtab_command>::mapping(
IO &IO, MachO::symtab_command &LoadCommand) {
-
IO.mapRequired("symoff", LoadCommand.symoff);
IO.mapRequired("nsyms", LoadCommand.nsyms);
IO.mapRequired("stroff", LoadCommand.stroff);
@@ -552,27 +531,23 @@ void MappingTraits<MachO::thread_command>::mapping(
void MappingTraits<MachO::twolevel_hints_command>::mapping(
IO &IO, MachO::twolevel_hints_command &LoadCommand) {
-
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("nhints", LoadCommand.nhints);
}
void MappingTraits<MachO::uuid_command>::mapping(
IO &IO, MachO::uuid_command &LoadCommand) {
-
IO.mapRequired("uuid", LoadCommand.uuid);
}
void MappingTraits<MachO::version_min_command>::mapping(
IO &IO, MachO::version_min_command &LoadCommand) {
-
IO.mapRequired("version", LoadCommand.version);
IO.mapRequired("sdk", LoadCommand.sdk);
}
void MappingTraits<MachO::note_command>::mapping(
IO &IO, MachO::note_command &LoadCommand) {
-
IO.mapRequired("data_owner", LoadCommand.data_owner);
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("size", LoadCommand.size);
@@ -580,13 +555,12 @@ void MappingTraits<MachO::note_command>::mapping(
void MappingTraits<MachO::build_version_command>::mapping(
IO &IO, MachO::build_version_command &LoadCommand) {
-
IO.mapRequired("platform", LoadCommand.platform);
IO.mapRequired("minos", LoadCommand.minos);
IO.mapRequired("sdk", LoadCommand.sdk);
IO.mapRequired("ntools", LoadCommand.ntools);
}
-} // namespace llvm::yaml
+} // end namespace yaml
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index 4b7154ebb7c1..850c1a5a06c0 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -12,7 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/ObjectYAML.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <string>
using namespace llvm;
using namespace yaml;
@@ -53,8 +56,8 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
IO.setError("YAML Object File missing document type tag!");
else
IO.setError(
- llvm::Twine("YAML Object File unsupported document type tag '") +
- llvm::Twine(Tag) + llvm::Twine("'!"));
+ Twine("YAML Object File unsupported document type tag '") +
+ Twine(Tag) + Twine("'!"));
}
}
}
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 65703c6cf683..2040efdc9d11 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/WasmYAML.h"
-#include "llvm/Object/Wasm.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MipsABIFlags.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
namespace llvm {
@@ -22,7 +23,7 @@ namespace WasmYAML {
// Declared here rather than in the header to comply with:
// http://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers
-Section::~Section() {}
+Section::~Section() = default;
} // end namespace WasmYAML
@@ -56,6 +57,8 @@ static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("DataSize", Section.DataSize);
+ IO.mapRequired("DataAlignment", Section.DataAlignment);
IO.mapRequired("SymbolInfo", Section.SymbolInfos);
}
@@ -403,4 +406,5 @@ void ScalarEnumerationTraits<WasmYAML::RelocType>::enumeration(
}
} // end namespace yaml
+
} // end namespace llvm
diff --git a/lib/ObjectYAML/YAML.cpp b/lib/ObjectYAML/YAML.cpp
index 75cf1fbccc80..67b5764eadaa 100644
--- a/lib/ObjectYAML/YAML.cpp
+++ b/lib/ObjectYAML/YAML.cpp
@@ -16,11 +16,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <cstdint>
using namespace llvm;
void yaml::ScalarTraits<yaml::BinaryRef>::output(
- const yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) {
+ const yaml::BinaryRef &Val, void *, raw_ostream &Out) {
Val.writeAsHex(Out);
}
@@ -34,7 +35,7 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
if (!isxdigit(Scalar[I]))
return "BinaryRef hex string must contain only hex digits.";
Val = yaml::BinaryRef(Scalar);
- return StringRef();
+ return {};
}
void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 78d5ea955e64..0380bd991d71 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -161,8 +161,8 @@ static cl::opt<bool>
cl::desc("Run NewGVN instead of GVN"));
static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-npm-earlycse-memssa", cl::init(false), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = off)"));
+ "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)"));
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
@@ -480,6 +480,14 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(PGOInstrumentationUse(ProfileUseFile));
}
+static InlineParams
+getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
+ auto O3 = PassBuilder::O3;
+ unsigned OptLevel = Level > O3 ? 2 : Level;
+ unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
+ return getInlineParams(OptLevel, SizeLevel);
+}
+
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
bool DebugLogging) {
@@ -527,13 +535,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Add all the requested passes for PGO, if requested.
if (PGOOpt) {
- assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO ||
+ assert(PGOOpt->RunProfileGen || !PGOOpt->SampleProfileFile.empty() ||
!PGOOpt->ProfileUseFile.empty());
- addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
- PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ if (PGOOpt->SampleProfileFile.empty())
+ addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
+ PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ else
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// Indirect call promotion that promotes intra-module targes only.
- MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO));
+ MPM.addPass(PGOIndirectCallPromotion(
+ false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
}
// Require the GlobalsAA analysis for the module so we can query it within
@@ -558,8 +570,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
- // FIXME; Customize the threshold based on optimization level.
- MainCGPipeline.addPass(InlinerPass());
+ MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level)));
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
@@ -751,9 +762,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
- // Rename anon globals to be able to export them in the summary.
- MPM.addPass(NameAnonGlobalPass());
-
return MPM;
}
@@ -772,9 +780,9 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
// During the ThinLTO backend phase we perform early indirect call promotion
// here, before globalopt. Otherwise imported available_externally functions
// look unreferenced and are removed.
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
- PGOOpt && PGOOpt->SamplePGO &&
- !PGOOpt->ProfileUseFile.empty()));
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty() &&
+ !PGOOpt->ProfileUseFile.empty()));
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
@@ -814,8 +822,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
- PGOOpt && PGOOpt->SamplePGO));
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -868,7 +876,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(InlinerPass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ InlinerPass(getInlineParamsFromOptLevel(Level))));
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index 4534e086b39e..8c5f136ea270 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -54,26 +54,26 @@ Counter CounterExpressionBuilder::get(const CounterExpression &E) {
return Counter::getExpression(I);
}
-void CounterExpressionBuilder::extractTerms(
- Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) {
+void CounterExpressionBuilder::extractTerms(Counter C, int Factor,
+ SmallVectorImpl<Term> &Terms) {
switch (C.getKind()) {
case Counter::Zero:
break;
case Counter::CounterValueReference:
- Terms.push_back(std::make_pair(C.getCounterID(), Sign));
+ Terms.emplace_back(C.getCounterID(), Factor);
break;
case Counter::Expression:
const auto &E = Expressions[C.getExpressionID()];
- extractTerms(E.LHS, Sign, Terms);
- extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign,
- Terms);
+ extractTerms(E.LHS, Factor, Terms);
+ extractTerms(
+ E.RHS, E.Kind == CounterExpression::Subtract ? -Factor : Factor, Terms);
break;
}
}
Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
// Gather constant terms.
- SmallVector<std::pair<unsigned, int>, 32> Terms;
+ SmallVector<Term, 32> Terms;
extractTerms(ExpressionTree, +1, Terms);
// If there are no terms, this is just a zero. The algorithm below assumes at
@@ -82,17 +82,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
return Counter::getZero();
// Group the terms by counter ID.
- std::sort(Terms.begin(), Terms.end(),
- [](const std::pair<unsigned, int> &LHS,
- const std::pair<unsigned, int> &RHS) {
- return LHS.first < RHS.first;
+ std::sort(Terms.begin(), Terms.end(), [](const Term &LHS, const Term &RHS) {
+ return LHS.CounterID < RHS.CounterID;
});
// Combine terms by counter ID to eliminate counters that sum to zero.
auto Prev = Terms.begin();
for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) {
- if (I->first == Prev->first) {
- Prev->second += I->second;
+ if (I->CounterID == Prev->CounterID) {
+ Prev->Factor += I->Factor;
continue;
}
++Prev;
@@ -103,24 +101,24 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
Counter C;
// Create additions. We do this before subtractions to avoid constructs like
// ((0 - X) + Y), as opposed to (Y - X).
- for (auto Term : Terms) {
- if (Term.second <= 0)
+ for (auto T : Terms) {
+ if (T.Factor <= 0)
continue;
- for (int I = 0; I < Term.second; ++I)
+ for (int I = 0; I < T.Factor; ++I)
if (C.isZero())
- C = Counter::getCounter(Term.first);
+ C = Counter::getCounter(T.CounterID);
else
C = get(CounterExpression(CounterExpression::Add, C,
- Counter::getCounter(Term.first)));
+ Counter::getCounter(T.CounterID)));
}
// Create subtractions.
- for (auto Term : Terms) {
- if (Term.second >= 0)
+ for (auto T : Terms) {
+ if (T.Factor >= 0)
continue;
- for (int I = 0; I < -Term.second; ++I)
+ for (int I = 0; I < -T.Factor; ++I)
C = get(CounterExpression(CounterExpression::Subtract, C,
- Counter::getCounter(Term.first)));
+ Counter::getCounter(T.CounterID)));
}
return C;
}
@@ -247,18 +245,6 @@ Error CoverageMapping::loadFunctionRecord(
return Error::success();
}
-Expected<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(CoverageMappingReader &CoverageReader,
- IndexedInstrProfReader &ProfileReader) {
- auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
-
- for (const auto &Record : CoverageReader)
- if (Error E = Coverage->loadFunctionRecord(Record, ProfileReader))
- return std::move(E);
-
- return std::move(Coverage);
-}
-
Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
IndexedInstrProfReader &ProfileReader) {
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 005061c4f068..a1d18724fcd5 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -504,9 +504,11 @@ void InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
SIPE.addError(instrprof_error::value_site_count_mismatch);
return;
}
+ if (!ThisNumValueSites)
+ return;
std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
- getValueSitesForKind(ValueKind);
- std::vector<InstrProfValueSiteRecord> &OtherSiteRecords =
+ getOrCreateValueSitesForKind(ValueKind);
+ MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
Src.getValueSitesForKind(ValueKind);
for (uint32_t I = 0; I < ThisNumValueSites; I++)
ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight);
@@ -533,11 +535,8 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) {
}
void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) {
- uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
- std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
- getValueSitesForKind(ValueKind);
- for (uint32_t I = 0; I < ThisNumValueSites; I++)
- ThisSiteRecords[I].scale(SIPE, Weight);
+ for (auto &R : getValueSitesForKind(ValueKind))
+ R.scale(SIPE, Weight);
}
void InstrProfRecord::scale(uint64_t Weight) {
@@ -583,7 +582,7 @@ void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
}
std::vector<InstrProfValueSiteRecord> &ValueSites =
- getValueSitesForKind(ValueKind);
+ getOrCreateValueSitesForKind(ValueKind);
if (N == 0)
ValueSites.emplace_back();
else
@@ -642,8 +641,9 @@ static ValueProfRecordClosure InstrProfRecordClosure = {
// Wrapper implementation using the closure mechanism.
uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
- InstrProfRecordClosure.Record = &Record;
- return getValueProfDataSize(&InstrProfRecordClosure);
+ auto Closure = InstrProfRecordClosure;
+ Closure.Record = &Record;
+ return getValueProfDataSize(&Closure);
}
// Wrapper implementation using the closure mechanism.
diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUCodeObjectMetadata.cpp
index a00e371415a3..863093ab7def 100644
--- a/lib/Support/AMDGPUCodeObjectMetadata.cpp
+++ b/lib/Support/AMDGPUCodeObjectMetadata.cpp
@@ -20,8 +20,6 @@
using namespace llvm::AMDGPU;
using namespace llvm::AMDGPU::CodeObject;
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 234f7439a546..232efe648b03 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -327,6 +327,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_SKYLAKE_AVX512,
INTEL_ATOM_BONNELL,
INTEL_ATOM_SILVERMONT,
+ INTEL_ATOM_GOLDMONT,
INTEL_KNIGHTS_LANDING,
AMDPENTIUM_K6,
AMDPENTIUM_K62,
@@ -707,7 +708,12 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_SILVERMONT;
break; // "silvermont"
-
+ // Goldmont:
+ case 0x5c:
+ case 0x5f:
+ *Type = INTEL_ATOM;
+ *Subtype = INTEL_ATOM_GOLDMONT;
+ break; // "goldmont"
case 0x57:
*Type = INTEL_XEONPHI; // knl
*Subtype = INTEL_KNIGHTS_LANDING;
@@ -1070,6 +1076,8 @@ StringRef sys::getHostCPUName() {
switch (Subtype) {
case INTEL_ATOM_BONNELL:
return "bonnell";
+ case INTEL_ATOM_GOLDMONT:
+ return "goldmont";
case INTEL_ATOM_SILVERMONT:
return "silvermont";
default:
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 227e792d83dc..85e782b2c048 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -240,11 +240,9 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) {
// Read into Buffer until we hit EOF.
do {
Buffer.reserve(Buffer.size() + ChunkSize);
- ReadBytes = read(FD, Buffer.end(), ChunkSize);
- if (ReadBytes == -1) {
- if (errno == EINTR) continue;
+ ReadBytes = sys::RetryAfterSignal(-1, read, FD, Buffer.end(), ChunkSize);
+ if (ReadBytes == -1)
return std::error_code(errno, std::generic_category());
- }
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
@@ -391,13 +389,12 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
while (BytesLeft) {
#ifdef HAVE_PREAD
- ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
+ MapSize - BytesLeft + Offset);
#else
- ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
#endif
if (NumRead == -1) {
- if (errno == EINTR)
- continue;
// Error while reading.
return std::error_code(errno, std::generic_category());
}
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index b16351906a4c..13bb6f23bc83 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -784,6 +784,42 @@ unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
return 0;
}
+StringRef llvm::ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
+ StringRef ArchName =
+ CPU.empty() ? TT.getArchName() : ARM::getArchName(ARM::parseCPUArch(CPU));
+
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == Triple::EABI ||
+ TT.getOS() == Triple::UnknownOS ||
+ llvm::ARM::parseArchProfile(ArchName) == ARM::PK_M)
+ return "aapcs";
+ if (TT.isWatchABI())
+ return "aapcs16";
+ return "apcs-gnu";
+ } else if (TT.isOSWindows())
+ // FIXME: this is invalid for WindowsCE.
+ return "aapcs";
+
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case Triple::Android:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ case Triple::MuslEABI:
+ case Triple::MuslEABIHF:
+ return "aapcs-linux";
+ case Triple::EABIHF:
+ case Triple::EABI:
+ return "aapcs";
+ default:
+ if (TT.isOSNetBSD())
+ return "apcs-gnu";
+ if (TT.isOSOpenBSD())
+ return "aapcs-linux";
+ return "aapcs";
+ }
+}
+
StringRef llvm::AArch64::getCanonicalArchName(StringRef Arch) {
return ARM::getCanonicalArchName(Arch);
}
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index b6774692595b..45097eb918b7 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -737,10 +737,8 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
#ifdef O_CLOEXEC
OpenFlags |= O_CLOEXEC;
#endif
- while ((ResultFD = open(P.begin(), OpenFlags)) < 0) {
- if (errno != EINTR)
- return std::error_code(errno, std::generic_category());
- }
+ if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags)) < 0)
+ return std::error_code(errno, std::generic_category());
#ifndef O_CLOEXEC
int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
(void)r;
@@ -800,10 +798,8 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
- while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) {
- if (errno != EINTR)
- return std::error_code(errno, std::generic_category());
- }
+ if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags, Mode)) < 0)
+ return std::error_code(errno, std::generic_category());
#ifndef O_CLOEXEC
int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
(void)r;
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 1d0143c6716e..2d4662094682 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -207,13 +207,10 @@ std::error_code Process::FixupStandardFileDescriptors() {
for (int StandardFD : StandardFDs) {
struct stat st;
errno = 0;
- while (fstat(StandardFD, &st) < 0) {
+ if (RetryAfterSignal(-1, fstat, StandardFD, &st) < 0) {
assert(errno && "expected errno to be set if fstat failed!");
// fstat should return EBADF if the file descriptor is closed.
- if (errno == EBADF)
- break;
- // retry fstat if we got EINTR, otherwise bubble up the failure.
- if (errno != EINTR)
+ if (errno != EBADF)
return std::error_code(errno, std::generic_category());
}
// if fstat succeeds, move on to the next FD.
@@ -222,11 +219,8 @@ std::error_code Process::FixupStandardFileDescriptors() {
assert(errno == EBADF && "expected errno to have EBADF at this point!");
if (NullFD < 0) {
- while ((NullFD = open("/dev/null", O_RDWR)) < 0) {
- if (errno == EINTR)
- continue;
+ if ((NullFD = RetryAfterSignal(-1, open, "/dev/null", O_RDWR)) < 0)
return std::error_code(errno, std::generic_category());
- }
}
if (NullFD == StandardFD)
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index f27bc97ec3f3..0a948812ff33 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -22,7 +22,7 @@
/// cbz w8, .LBB1_2 -> b.eq .LBB1_2
///
/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
-/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
+/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
///
//===----------------------------------------------------------------------===//
@@ -129,11 +129,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
break;
case AArch64::TBZW:
case AArch64::TBZX:
- CC = AArch64CC::GE;
+ CC = AArch64CC::PL;
break;
case AArch64::TBNZW:
case AArch64::TBNZX:
- CC = AArch64CC::LT;
+ CC = AArch64CC::MI;
break;
}
return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
@@ -271,6 +271,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
}
break;
}
+ (void)NewCmp; (void)NewBr;
assert(NewCmp && NewBr && "Expected new instructions.");
DEBUG(dbgs() << " with instruction:\n ");
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 00a0111f2bd2..9eda56c825a9 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -139,6 +140,7 @@ class SSACCmpConv {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ const MachineBranchProbabilityInfo *MBPI;
public:
/// The first block containing a conditional branch, dominating everything
@@ -186,8 +188,10 @@ private:
public:
/// runOnMachineFunction - Initialize per-function data structures.
- void runOnMachineFunction(MachineFunction &MF) {
+ void runOnMachineFunction(MachineFunction &MF,
+ const MachineBranchProbabilityInfo *MBPI) {
this->MF = &MF;
+ this->MBPI = MBPI;
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
@@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
- Head->removeSuccessor(CmpBB, true);
- CmpBB->removeSuccessor(Tail, true);
+
+ // Save successor probabilties before removing CmpBB and Tail from their
+ // parents.
+ BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
+ BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
+
+ Head->removeSuccessor(CmpBB);
+ CmpBB->removeSuccessor(Tail);
+
+ // If Head and CmpBB had successor probabilties, udpate the probabilities to
+ // reflect the ccmp-conversion.
+ if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
+
+ // Head is allowed two successors. We've removed CmpBB, so the remaining
+ // successor is Tail. We need to increase the successor probability for
+ // Tail to account for the CmpBB path we removed.
+ //
+ // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
+ assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
+ BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
+ Head->setSuccProbability(Head->succ_begin(),
+ Head2Tail + Head2CmpBB * CmpBB2Tail);
+
+ // We will transfer successors of CmpBB to Head in a moment without
+ // normalizing the successor probabilities. Set the successor probabilites
+ // before doing so.
+ //
+ // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
+ for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
+ BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
+ CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
+ }
+ }
+
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->removeBranch(*Head);
@@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
namespace {
class AArch64ConditionalCompares : public MachineFunctionPass {
+ const MachineBranchProbabilityInfo *MBPI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
@@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
"AArch64 CCMP Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
@@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() {
}
void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
MinSize = MF.getFunction()->optForMinSize();
bool Changed = false;
- CmpConv.runOnMachineFunction(MF);
+ CmpConv.runOnMachineFunction(MF, MBPI);
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
// cmp-conversions from the same head block.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2965106fd270..aaf32a499bc3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7561,8 +7561,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ad24612239fa..6cb723d187af 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -735,7 +735,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>;
def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
-let AddedComplexity = 7 in {
+let AddedComplexity = 5 in {
defm MADD : MulAccum<0, "madd", add>;
defm MSUB : MulAccum<1, "msub", sub>;
@@ -752,7 +752,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-} // AddedComplexity = 7
+} // AddedComplexity = 5
let AddedComplexity = 5 in {
def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 9bfd570e9a82..07ce0e863c5e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -947,7 +947,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
- DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+ DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
@@ -964,16 +964,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
// Nothing to be done
+ } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
+ SrcTy == LLT::scalar(64)) {
+ llvm_unreachable("TableGen can import this case");
+ return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
+ DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 01196817f311..4b568f3fba2b 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
+ for (auto Ty : {p0, s1, s8, s16, s32, s64})
+ setAction({G_IMPLICIT_DEF, Ty}, Legal);
+
for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
@@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
}
+ for (auto Ty : {s1, s8, s16, s32, s64, p0})
+ setAction({G_EXTRACT, Ty}, Legal);
+
+ for (auto Ty : {s32, s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+
for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 45083df7ab45..f82b9dbc2c9f 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
+MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+ return MCOperand::createExpr(Expr);
+}
+
MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
if (Printer.TM.getTargetTriple().isOSDarwin())
return lowerSymbolOperandDarwin(MO, Sym);
+ if (Printer.TM.getTargetTriple().isOSBinFormatCOFF())
+ return lowerSymbolOperandCOFF(MO, Sym);
- assert(Printer.TM.getTargetTriple().isOSBinFormatELF() &&
- "Expect Darwin or ELF target");
+ assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target");
return lowerSymbolOperandELF(MO, Sym);
}
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 1e29b80c2d62..aa30fe1fa707 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -42,6 +42,8 @@ public:
MCSymbol *Sym) const;
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const;
+ MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index baf15ac540cf..fab92e139dd0 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "Invalid target");
return CSR_AArch64_TLS_ELF_RegMask;
}
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 3654eeca530a..10df50bcf156 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
- THX2T99P3, THX2T99P4, THX2T99P5]> {
- let BufferSize=60;
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
+ let BufferSize = 60;
}
// Define commonly used write types for InstRW specializations.
// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on I2.
+def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on I1.
-def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 23 cycles on I1.
+def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
+}
+
+// 39 cycles on I1.
+def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
+}
// 1 cycle on I0, I1, or I2.
-def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on I0, I1, or I2.
+def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I0, I1, or I2.
+def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on I0, I1, or I2.
+def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
// 5 cycles on F1.
-def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 7 cycles on F1.
-def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
// 4 cycles on F0 or F1.
-def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// 5 cycles on F0 or F1.
-def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 6 cycles on F0 or F1.
-def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
// 7 cycles on F0 or F1.
-def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// 8 cycles on F0 or F1.
-def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 10 cycles on F0 or F1.
+def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
// 16 cycles on F0 or F1.
def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
+ let NumMicroOps = 3;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
+ let NumMicroOps = 3;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 0;
+}
+
+// 1 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+// 2 cycles on LS0 or LS1.
+def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on LS0 or LS1.
-def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// 5 cycles on LS0 or LS1.
-def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// 6 cycles on LS0 or LS1.
-def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def THX2T99Write_6Cyc_LS01_I012_I012 :
+def THX2T99Write_5Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012_I012 :
SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
@@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
// 5 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// Define commonly used read types.
@@ -195,10 +327,8 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
-
}
-
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
def : WriteRes<WriteHint, []> { let Latency = 1; }
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> {
+ let Unsupported = 1;
+ let NumMicroOps = 2;
+}
-// Branch, register
-// Branch and link, register != LR
-// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
+//---
+// Branch
+//---
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>;
+def : InstRW<[THX2T99Write_1Cyc_I2],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
//---
// 3.2 Arithmetic and Logical Instructions
// 3.3 Move and Shift Instructions
//---
+
// ALU, basic
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ResourceCycles = [2, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : WriteRes<WriteIEReg, [THX2T99I012]> {
- let Latency = 2;
- let ResourceCycles = [2];
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
// Move immed
-def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
// Variable shift
-def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
//---
// 3.4 Divide and Multiply Instructions
//---
// Divide, W-form
-// Latency range of 13-23. Take the average.
+// Latency range of 13-23/13-39.
def : WriteRes<WriteID32, [THX2T99I1]> {
- let Latency = 18;
- let ResourceCycles = [18];
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
}
// Divide, X-form
-// Latency range of 13-39. Take the average.
def : WriteRes<WriteID64, [THX2T99I1]> {
- let Latency = 26;
- let ResourceCycles = [26];
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012],
+// (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[THX2T99Write_5Cyc_I012],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Multiply high
+def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
-// Bitfield move, basic
// Bitfield move, insert
-// NOTE: Handled by WriteIS.
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>;
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>;
// Count leading
def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
- "^CLZ(W|X)r$")>;
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>;
+
+// CRC Instructions
+// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>;
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>;
+
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>;
// Reverse bits/bytes
// NOTE: Handled by WriteI.
//---
-// 3.6 Load Instructions
+// 3.6 Load Instructions
// 3.10 FP Load Instructions
//---
@@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<NoSchedPred, [ReadDefault]>]>;
def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
-// Load pair, immed offset, normal
-// Load pair, immed offset, signed words, base != SP
-// Load pair, immed offset signed words, base = SP
-// LDP only breaks into *one* LS micro-op. Thus
-// the resources are handling by WriteLD.
-def : WriteRes<WriteLDHi, []> {
- let Latency = 5;
-}
-
// Load pair, immed pre-index, normal
// Load pair, immed pre-index, signed words
// Load pair, immed post-index, normal
// Load pair, immed post-index, signed words
// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>;
+
//--
-// 3.7 Store Instructions
+// 3.7 Store Instructions
// 3.11 FP Store Instructions
//--
@@ -382,6 +863,195 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// Store pair, immed pre-index, X-form
// NOTE: Handled by WriteAdr, WriteSTP.
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
//---
// 3.8 FP Data Processing Instructions
//---
@@ -389,28 +1059,95 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// FP arithmetic
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
-// FP divide, S-form
-// FP square root, S-form
-def : WriteRes<WriteFDiv, [THX2T99F01]> {
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
+ let NumMicroOps = 4;
}
+def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>;
+
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[THX2T99XWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
// FP round to integral
def : InstRW<[THX2T99Write_7Cyc_F01],
@@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
@@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4, 23];
+}
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[THX2T99Write_5Cyc_F01],
- (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv",
+ "^ORRv", "^ORNv", "^NOTv")>;
+// ASIMD arith, reduce
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv",
+ "SQSHRNv","SQSHRUNv", "UQRSHRNv",
+ "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv",
+ "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUBHNv", "^SUQADD",
+ "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv",
+ "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>;
//---
// 3.13 ASIMD Floating-point Instructions
@@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
@@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, Q-form
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
- "^FCMGTv", "^FCMLEv",
- "^FCMLTv")>;
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
// ASIMD FP convert, long
// ASIMD FP convert, narrow
@@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// ASIMD FP convert, other, Q-form
// NOTE: Handled by WriteV.
+// ASIMD FP convert, long and narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
// ASIMD FP divide, D-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>;
// ASIMD FP divide, Q-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>;
// ASIMD FP divide, Q-form, F64
def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
@@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP negate
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
-// ASIMD FP round, D-form
-// ASIMD FP round, Q-form
-// NOTE: Handled by WriteV.
-
//--
// 3.14 ASIMD Miscellaneous Instructions
//--
@@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
// ASIMD extract
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>;
+
// ASIMD extract narrow, saturating
-// NOTE: Handled by WriteV.
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
// ASIMD insert, element to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
+
// ASIMD move, integer immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
+// ASIMD table lookup, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
- "^FRSQRTEv", "^URSQRTEv")>;
+ "^FRSQRTEv", "^URSQRTEv")>;
// ASIMD reciprocal step, D-form, FZ
// ASIMD reciprocal step, D-form, no FZ
@@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
@@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>;
// ASIMD transfer gen reg to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
- "^UZP1v", "^UZP2v")>;
+ "^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
-// 3.15 ASIMD Load Instructions
+// 3.15 ASIMD Load Instructions
//--
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
-//--
-// 3.17 Cryptography Extensions
-//--
-
-// Crypto AES ops
-def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
-
-// Crypto polynomial (64x64) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
-
-// Crypto SHA1 xor ops
-// Crypto SHA1 schedule acceleration ops
-// Crypto SHA256 schedule acceleration op (1 u-op)
-// Crypto SHA256 schedule acceleration op (2 u-ops)
-// Crypto SHA256 hash acceleration ops
-def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
-
-//--
-// 3.18 CRC
-//--
-
-// CRC checksum ops
-def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-
} // SchedModel = ThunderX2T99Model
+
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 6660f0babb8a..1252f9403812 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -167,6 +167,8 @@ extern "C" void LLVMInitializeAArch64Target() {
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
return llvm::make_unique<AArch64_MachoTargetObjectFile>();
+ if (TT.isOSBinFormatCOFF())
+ return llvm::make_unique<AArch64_COFFTargetObjectFile>();
return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
@@ -179,6 +181,8 @@ static std::string computeDataLayout(const Triple &TT,
return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
if (TT.isOSBinFormatMachO())
return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 2c75a3258c1c..fefa7e26b79f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -36,6 +36,7 @@ public:
~AArch64TargetMachine() override;
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
+ const AArch64Subtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 47e3bce43f6e..9077eb7902fd 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -45,6 +45,9 @@ public:
const TargetMachine &TM) const override;
};
+/// This implementation is used for AArch64 COFF targets.
+class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4328682b93c..a76f080530bb 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -20,6 +20,23 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
+static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
+ cl::init(true), cl::Hidden);
+
+bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // Inline a callee if its target-features are a subset of the callers
+ // target-features.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return ST->getMaxInterleaveFactor();
}
-void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+// For Falkor, we want to avoid having too many strided loads in a loop since
+// that can exhaust the HW prefetcher resources. We adjust the unroller
+// MaxCount preference below to attempt to ensure unrolling doesn't create too
+// many strided loads.
+static void
+getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ enum { MaxStridedLoads = 7 };
+ auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
+ int StridedLoads = 0;
+ // FIXME? We could make this more precise by looking at the CFG and
+ // e.g. not counting loads in each side of an if-then-else diamond.
+ for (const auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ LoadInst *LMemI = dyn_cast<LoadInst>(&I);
+ if (!LMemI)
+ continue;
+
+ Value *PtrValue = LMemI->getPointerOperand();
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ // FIXME? We could take pairing of unrolled load copies into account
+ // by looking at the AddRec, but we would probably have to limit this
+ // to loops with no stores or other memory optimization barriers.
+ ++StridedLoads;
+ // We've seen enough strided loads that seeing more won't make a
+ // difference.
+ if (StridedLoads > MaxStridedLoads / 2)
+ return StridedLoads;
+ }
+ }
+ return StridedLoads;
+ };
+
+ int StridedLoads = countStridedLoads(L, SE);
+ DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
+ << " strided loads\n");
+ // Pick the largest power of 2 unroll count that won't result in too many
+ // strided loads.
+ if (StridedLoads) {
+ UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
+ DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount
+ << '\n');
+ }
+}
+
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Enable partial unrolling and runtime unrolling.
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
@@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
+
+ if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
+ EnableFalkorHWPFUnrollFix)
+ getFalkorUnrollingPreferences(L, SE, UP);
}
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 290a1ca1f24b..31c037354925 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -51,6 +51,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -119,7 +122,8 @@ public:
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 3d075018904c..475f91016840 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -541,14 +541,13 @@ public:
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
};
-void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target,
- bool &IsResolved) {
+bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
@@ -562,11 +561,24 @@ void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
// section isn't 0x1000-aligned, we therefore need to delegate this decision
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
- IsResolved = false;
+ return true;
+ return false;
}
}
+namespace {
+class COFFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple)
+ : AArch64AsmBackend(T, /*IsLittleEndian*/true) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createAArch64WinCOFFObjectWriter(OS);
+ }
+};
+}
+
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple,
@@ -575,7 +587,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
if (TheTriple.isOSBinFormatMachO())
return new DarwinAArch64AsmBackend(T, MRI);
- assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
+ if (TheTriple.isOSBinFormatCOFF())
+ return new COFFAArch64AsmBackend(T, TheTriple);
+
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
+
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
bool IsILP32 = Options.getABIName() == "ilp32";
return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index f7dda92fb551..89c3e5b4c76e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-#define R_CLS(rtype) \
- IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
-#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
- "supported (LP64 eqv: " #lp64rtype ")"
+#define R_CLS(rtype) \
+ IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
+#define BAD_ILP32_MOV(lp64rtype) \
+ "ILP32 absolute MOV relocation not " \
+ "supported (LP64 eqv: " #lp64rtype ")"
// assumes IsILP32 is true
static bool isNonILP32reloc(const MCFixup &Fixup,
@@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup,
MCContext &Ctx) {
if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw)
return false;
- switch(RefKind) {
- case AArch64MCExpr::VK_ABS_G3:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
- return true;
- case AArch64MCExpr::VK_ABS_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
- return true;
- case AArch64MCExpr::VK_ABS_G1_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
- return true;
- case AArch64MCExpr::VK_ABS_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
- return true;
- case AArch64MCExpr::VK_DTPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
- return true;
- case AArch64MCExpr::VK_DTPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_TPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
- return true;
- case AArch64MCExpr::VK_TPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G1:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G0_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
- return true;
- default: return false;
+ switch (RefKind) {
+ case AArch64MCExpr::VK_ABS_G3:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G1:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G0_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
+ return true;
+ default:
+ return false;
}
return false;
}
@@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(PREL32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data "
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte PC relative data "
"relocation not supported (LP64 eqv: PREL64)");
return ELF::R_AARCH64_NONE;
} else
@@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
}
} else {
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
- return ELF::R_AARCH64_NONE;
+ return ELF::R_AARCH64_NONE;
switch ((unsigned)Fixup.getKind()) {
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
@@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data "
- "relocation not supported (LP64 eqv: ABS64)");
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte absolute data "
+ "relocation not supported (LP64 eqv: ABS64)");
return ELF::R_AARCH64_NONE;
} else
return ELF::R_AARCH64_ABS64;
@@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte unchecked GOT load/store relocation "
- "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
+ "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
return ELF::R_AARCH64_NONE;
}
}
@@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
"ILP32 4 byte checked GOT load/store relocation "
- "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
+ "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte checked GOT load/store relocation "
- "not supported (unchecked/ILP32 eqv: "
- "LD32_GOT_LO12_NC)");
+ "not supported (unchecked/ILP32 eqv: "
+ "LD32_GOT_LO12_NC)");
}
return ELF::R_AARCH64_NONE;
}
@@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC;
} else {
- Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store "
+ Ctx.reportError(Fixup.getLoc(),
+ "LP64 32-bit load/store "
"relocation not supported (ILP32 eqv: "
"TLSIE_LD32_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
@@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte TLSDESC load/store relocation "
- "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
+ "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
Ctx.reportError(Fixup.getLoc(),
"invalid fixup for 32-bit load/store instruction "
- "fixup_aarch64_ldst_imm12_scale4");
+ "fixup_aarch64_ldst_imm12_scale4");
return ELF::R_AARCH64_NONE;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
@@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "LD64_GOT_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "LD64_GOT_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSIE_LD64_GOTTPREL_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "TLSIE_LD64_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSDESC_LD64_LO12;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSDESC_LD64_LO12)");
+ "relocation not supported (LP64 eqv: "
+ "TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 031aa8b81e35..a0de3c39562b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
+#include "AArch64WinCOFFStreamer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -30,6 +31,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSBinFormatELF())
return new AArch64TargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return new AArch64TargetWinCOFFStreamer(S);
return nullptr;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 0f5b765c7697..4293dcba955e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -16,53 +16,47 @@ namespace llvm {
namespace AArch64 {
enum Fixups {
- // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADR instruction.
+ // A 21-bit pc-relative immediate inserted into an ADR instruction.
fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind,
- // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADRP instruction.
+ // A 21-bit pc-relative immediate inserted into an ADRP instruction.
fixup_aarch64_pcrel_adrp_imm21,
- // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions.
- // No alignment adjustment. All value bits are encoded.
+ // 12-bit fixup for add/sub instructions. No alignment adjustment. All value
+ // bits are encoded.
fixup_aarch64_add_imm12,
- // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and
- // store instructions.
+ // unsigned 12-bit fixups for load and store instructions.
fixup_aarch64_ldst_imm12_scale1,
fixup_aarch64_ldst_imm12_scale2,
fixup_aarch64_ldst_imm12_scale4,
fixup_aarch64_ldst_imm12_scale8,
fixup_aarch64_ldst_imm12_scale16,
- // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by
- // pc-relative loads and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and
+ // generates relocations directly when necessary.
fixup_aarch64_ldr_pcrel_imm19,
// FIXME: comment
fixup_aarch64_movw,
- // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
- // immediate.
+ // The high 14 bits of a 21-bit pc-relative immediate.
fixup_aarch64_pcrel_branch14,
- // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by
- // b.cc and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates
+ // relocations directly when necessary.
fixup_aarch64_pcrel_branch19,
- // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
- // immediate.
+ // The high 26 bits of a 28-bit pc-relative immediate.
fixup_aarch64_pcrel_branch26,
- // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
- // immediate. Distinguished from branch26 only on ELF.
+ // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from
+ // branch26 only on ELF.
fixup_aarch64_pcrel_call26,
- // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF
- // R_AARCH64_TLSDESC_CALL relocation.
+ // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation.
fixup_aarch64_tlsdesc_call,
// Marker
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 1b28df963b40..fc808ee0cdd6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -100,3 +100,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
HasIdentDirective = true;
}
+
+AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
+ CommentString = ";";
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 253cd30f26ee..2d7107a37244 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
+#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
explicit AArch64MCAsmInfoELF(const Triple &T);
};
+struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit AArch64MCAsmInfoCOFF();
+};
+
} // namespace llvm
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f710065d9bc7..a2555496cdb9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "AArch64MCTargetDesc.h"
#include "AArch64ELFStreamer.h"
#include "AArch64MCAsmInfo.h"
+#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
MAI = new AArch64MCAsmInfoDarwin();
+ else if (TheTriple.isOSBinFormatCOFF())
+ MAI = new AArch64MCAsmInfoCOFF();
else {
- assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
MAI = new AArch64MCAsmInfoELF(TheTriple);
}
@@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
CodeModel::Model &CM) {
- assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) &&
- "Only expect Darwin and ELF targets");
+ assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
+ TT.isOSBinFormatCOFF()) && "Invalid target");
if (CM == CodeModel::Default)
CM = CodeModel::Small;
@@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
/*LabelSections*/ true);
}
+static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ IncrementalLinkerCompatible);
+}
+
static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
return new MCInstrAnalysis(Info);
}
@@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the obj streamers.
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer);
// Register the obj target streamer.
TargetRegistry::RegisterObjectTargetStreamer(
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 615d7dab2c51..1404926b8124 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS);
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..7862a03e771c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ AArch64WinCOFFObjectWriter()
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {
+ }
+
+ ~AArch64WinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+} // end anonymous namespace
+
+unsigned
+AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+}
+
+bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return true;
+}
+
+namespace llvm {
+
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) {
+ MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter();
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..6c8da27e398f
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -0,0 +1,37 @@
+//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64WinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ friend class AArch64TargetWinCOFFStreamer;
+
+ AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) {}
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
+}
+
+} // end llvm namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
new file mode 100644
index 000000000000..1b4fcd6804e2
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -0,0 +1,43 @@
+//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements WinCOFF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+
+#include "AArch64TargetStreamer.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+namespace {
+class AArch64WinCOFFStreamer;
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+ AArch64WinCOFFStreamer &getStreamer();
+
+public:
+ AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+ : AArch64TargetStreamer(S) {}
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
+} // end llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 6d8be5e63fbb..56eeba8a1d4b 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
AArch64TargetStreamer.cpp
+ AArch64WinCOFFObjectWriter.cpp
+ AArch64WinCOFFStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 55d18c3f3646..5a799b2d88d0 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -36,7 +36,6 @@ FunctionPass *createR600ControlFlowFinalizer();
FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
-FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 7494e5decd6f..f1d899c4d003 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
"Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
>;
-def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
- "HasSDWAClampVOPC",
+def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
+ "HasSDWAOutModsVOPC",
"true",
"Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
>;
@@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
- FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
+ FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
]
>;
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 2071b6f157cd..9a391d06c9ea 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1776,7 +1776,7 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
E = EndMBB->succ_end();
PI != E; ++PI) {
// Either we have a back-edge to the entry block, or a back-edge to the
- // succesor of the entry block since the block may be split.
+ // successor of the entry block since the block may be split.
if ((*PI) != StartMBB &&
!((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
Succs.insert(
@@ -1831,7 +1831,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
IfBB->addSuccessor(CodeBBStart);
DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
- // Ensure that the MergeBB is a succesor of the CodeEndBB.
+ // Ensure that the MergeBB is a successor of the CodeEndBB.
if (!CodeBBEnd->isSuccessor(MergeBB))
CodeBBEnd->addSuccessor(MergeBB);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ab5abf2039a5..be47b900c6f0 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
- HasSDWAClampVOPC(false),
+ HasSDWAOutModsVOPC(false),
HasDPP(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 2b16289c723e..22cede59086a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -153,7 +153,7 @@ protected:
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
- bool HasSDWAClampVOPC;
+ bool HasSDWAOutModsVOPC;
bool HasDPP;
bool FlatAddressSpace;
bool FlatInstOffsets;
@@ -452,8 +452,8 @@ public:
return HasSDWAMac;
}
- bool hasSDWAClampVOPC() const {
- return HasSDWAClampVOPC;
+ bool hasSDWAOutModsVOPC() const {
+ return HasSDWAOutModsVOPC;
}
/// \brief Returns the offset in bytes from the start of the input buffer
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fe9f689806..425fd35d47de 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -720,7 +720,6 @@ bool GCNPassConfig::addPreISel() {
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
}
addPass(createSinkingPass());
- addPass(createSITypeRewriter());
addPass(createAMDGPUAnnotateUniformValues());
if (!LateCFGStructurize) {
addPass(createSIAnnotateControlFlowPass());
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 88245b01683a..89a03902dc69 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -63,7 +63,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
return false;
}
-void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
UP.MaxCount = UINT_MAX;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 485e20411ab4..9a320bdfcc3d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -68,7 +68,8 @@ public:
bool hasBranchDivergence() { return true; }
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index e30844f082cd..917d9cfa6905 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -96,7 +96,6 @@ add_llvm_target(AMDGPUCodeGen
SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
- SITypeRewriter.cpp
SIWholeQuadMode.cpp
GCNIterativeScheduler.cpp
GCNMinRegStrategy.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 04308fb3aaf6..f26e49295e69 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
using namespace AMDGPU::SDWA;
if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
- if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+ // XXX: static_cast<int> is needed to avoid stupid warning:
+ // compare with unsigned is always true
+ if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
Val <= SDWA9EncValues::SRC_VGPR_MAX) {
return createRegOperand(getVgprClassId(Width),
Val - SDWA9EncValues::SRC_VGPR_MIN);
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d0f4e00994de..d39b345bdf03 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4314,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
return SDValue();
}
+// Returns true if argument is a boolean value which is not serialized into
+// memory or argument and does not require v_cmdmask_b32 to be deserialized.
+static bool isBoolSGPR(SDValue V) {
+ if (V.getValueType() != MVT::i1)
+ return false;
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::SETCC:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case AMDGPUISD::FP_CLASS:
+ return true;
+ }
+ return false;
+}
+
SDValue SITargetLowering::performAndCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.isBeforeLegalize())
@@ -4402,6 +4419,16 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
}
}
+ if (VT == MVT::i32 &&
+ (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
+ // and x, (sext cc from i1) => select cc, x, 0
+ if (RHS.getOpcode() != ISD::SIGN_EXTEND)
+ std::swap(LHS, RHS);
+ if (isBoolSGPR(RHS.getOperand(0)))
+ return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
+ LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
+ }
+
return SDValue();
}
@@ -4941,8 +4968,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
- if (Cond.getOpcode() != ISD::SETCC &&
- Cond.getOpcode() != AMDGPUISD::FP_CLASS)
+ if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
@@ -5109,6 +5135,35 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = LHS.getValueType();
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ auto CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!CRHS) {
+ CRHS = dyn_cast<ConstantSDNode>(LHS);
+ if (CRHS) {
+ std::swap(LHS, RHS);
+ CC = getSetCCSwappedOperands(CC);
+ }
+ }
+
+ if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
+ isBoolSGPR(LHS.getOperand(0))) {
+ // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), -1, eq|sle|uge) => cc
+ // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
+ return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
+ DAG.getConstant(-1, SL, MVT::i1));
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
+ return LHS.getOperand(0);
+ }
if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
VT != MVT::f16))
@@ -5116,7 +5171,6 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// Match isinf pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index c9b48fea7225..b6784ec14e9f 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -770,7 +770,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+ Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
}
return;
@@ -871,7 +871,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+ Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
}
return;
@@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
- if ( DstIdx == -1)
- DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
@@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
return false;
}
- } else if (!ST.hasSDWAClampVOPC()) {
+ } else if (!ST.hasSDWAOutModsVOPC()) {
// No clamp allowed on GFX9 for VOPC
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
- if (Clamp != nullptr &&
- (!Clamp->isImm() || Clamp->getImm() != 0)) {
+ if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
return false;
}
+
+ // No omod allowed on GFX9 for VOPC
+ const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
+ ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
+ return false;
+ }
}
}
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 3b4a8b5d1e81..4a81fb3b463a 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -336,6 +336,10 @@ def NegSubInlineConst16 : ImmLeaf<i16, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
+def ShiftAmt32Imm : PatLeaf <(imm), [{
+ return N->getZExtValue() < 32;
+}]>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 3b4bdc864253..bcc685015cf5 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -929,6 +929,14 @@ def : UMad24Pat<V_MAD_U32_U24>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
+def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
+def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
/********** ====================== **********/
/********** Indirect addressing **********/
/********** ====================== **********/
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4ac23ef03cb3..e2ac6631d2f3 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
return false;
}
- if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ if (!ST.hasSDWAOutModsVOPC() &&
+ (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
return false;
- } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+ } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
+ !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
return false;
}
@@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);
- // Copy dst, if it is present in original then should also be present in SDWA
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (!Dst && !TII->isVOPC(MI))
- return false;
-
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
- } else {
- Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
assert(Dst &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}
// Copy omod if present, initialize otherwise if needed
- MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
- if (OMod) {
- assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
- SDWAInst.add(*OMod);
- } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
- SDWAInst.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+ MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod) {
+ SDWAInst.add(*OMod);
+ } else {
+ SDWAInst.addImm(0);
+ }
}
- // Initialize dst_sel and dst_unused if present
- if (Dst) {
- assert(
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+ // Initialize dst_sel if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ }
+
+ // Initialize dst_unused if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
}
diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp
deleted file mode 100644
index aad68537f779..000000000000
--- a/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass removes performs the following type substitution on all
-/// non-compute shaders:
-///
-/// v16i8 => i128
-/// - v16i8 is used for constant memory resource descriptors. This type is
-/// legal for some compute APIs, and we don't want to declare it as legal
-/// in the backend, because we want the legalizer to expand all v16i8
-/// operations.
-/// v1* => *
-/// - Having v1* types complicates the legalizer and we can easily replace
-/// - them with the element type.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-
-using namespace llvm;
-
-namespace {
-
-class SITypeRewriter : public FunctionPass,
- public InstVisitor<SITypeRewriter> {
-
- static char ID;
- Module *Mod;
- Type *v16i8;
- Type *v4i32;
-
-public:
- SITypeRewriter() : FunctionPass(ID) { }
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- StringRef getPassName() const override { return "SI Type Rewriter"; }
- void visitLoadInst(LoadInst &I);
- void visitCallInst(CallInst &I);
- void visitBitCast(BitCastInst &I);
-};
-
-} // End anonymous namespace
-
-char SITypeRewriter::ID = 0;
-
-bool SITypeRewriter::doInitialization(Module &M) {
- Mod = &M;
- v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
- return false;
-}
-
-bool SITypeRewriter::runOnFunction(Function &F) {
- if (!AMDGPU::isShader(F.getCallingConv()))
- return false;
-
- visit(F);
- visit(F);
-
- return false;
-}
-
-void SITypeRewriter::visitLoadInst(LoadInst &I) {
- Value *Ptr = I.getPointerOperand();
- Type *PtrTy = Ptr->getType();
- Type *ElemTy = PtrTy->getPointerElementType();
- IRBuilder<> Builder(&I);
- if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr,
- PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
- LoadInst *Load = Builder.CreateLoad(BitCast);
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- I.getAllMetadataOtherThanDebugLoc(MD);
- for (unsigned i = 0, e = MD.size(); i != e; ++i) {
- Load->setMetadata(MD[i].first, MD[i].second);
- }
- Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
- I.replaceAllUsesWith(BitCastLoad);
- I.eraseFromParent();
- }
-}
-
-void SITypeRewriter::visitCallInst(CallInst &I) {
- IRBuilder<> Builder(&I);
-
- SmallVector <Value*, 8> Args;
- SmallVector <Type*, 8> Types;
- bool NeedToReplace = false;
- Function *F = I.getCalledFunction();
- if (!F)
- return;
-
- std::string Name = F->getName();
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Value *Arg = I.getArgOperand(i);
- if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, v4i32));
- Types.push_back(v4i32);
- NeedToReplace = true;
- Name = Name + ".v4i32";
- } else if (Arg->getType()->isVectorTy() &&
- Arg->getType()->getVectorNumElements() == 1 &&
- Arg->getType()->getVectorElementType() ==
- Type::getInt32Ty(I.getContext())){
- Type *ElementTy = Arg->getType()->getVectorElementType();
- std::string TypeName = "i32";
- InsertElementInst *Def = cast<InsertElementInst>(Arg);
- Args.push_back(Def->getOperand(1));
- Types.push_back(ElementTy);
- std::string VecTypeName = "v1" + TypeName;
- Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
- NeedToReplace = true;
- } else {
- Args.push_back(Arg);
- Types.push_back(Arg->getType());
- }
- }
-
- if (!NeedToReplace) {
- return;
- }
- Function *NewF = Mod->getFunction(Name);
- if (!NewF) {
- NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
- NewF->setAttributes(F->getAttributes());
- }
- I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
- I.eraseFromParent();
-}
-
-void SITypeRewriter::visitBitCast(BitCastInst &I) {
- IRBuilder<> Builder(&I);
- if (I.getDestTy() != v4i32) {
- return;
- }
-
- if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == v4i32) {
- I.replaceAllUsesWith(Op->getOperand(0));
- I.eraseFromParent();
- }
- }
-}
-
-FunctionPass *llvm::createSITypeRewriter() {
- return new SITypeRewriter();
-}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 6f67183df6a1..c40b4450a5b5 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -222,6 +222,13 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
"Has return address stack">;
+// Some processors have no branch predictor, which changes the expected cost of
+// taking a branch which affects the choice of whether to use predicated
+// instructions.
+def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
+ "HasBranchPredictor", "false",
+ "Has no branch predictor">;
+
/// DSP extension.
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
"Supports DSP instructions in ARM and/or Thumb2">;
@@ -262,6 +269,10 @@ def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
+def FeatureExecuteOnly
+ : SubtargetFeature<"execute-only", "GenExecuteOnly", "true",
+ "Enable the generation of execute only code.">;
+
def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
"Reserve R9, making it unavailable as "
"GPR">;
@@ -540,7 +551,7 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
//
// Dummy CPU, used to target architectures
-def : ProcNoItin<"generic", []>;
+def : ProcessorModel<"generic", CortexA8Model, []>;
def : ProcNoItin<"arm8", [ARMv4]>;
def : ProcNoItin<"arm810", [ARMv4]>;
@@ -756,13 +767,19 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
-def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
+def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
+
+def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
-def : ProcNoItin<"cortex-m4", [ARMv7em,
+def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
- FeatureD16]>;
+ FeatureD16,
+ FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
@@ -771,11 +788,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
-def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
+def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
FeatureDSP,
FeatureFPARMv8,
FeatureD16,
- FeatureVFPOnlySP]>;
+ FeatureVFPOnlySP,
+ FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e0810c358f2d..1ec6b24b2ed6 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &,
+isProfitableToIfCvt(MachineBasicBlock &TBB,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &,
+ MachineBasicBlock &FBB,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
if (!TCycles)
@@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &,
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
const unsigned ScalingUpFactor = 1024;
- unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
- unsigned FUnpredCost =
+
+ unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
+ unsigned UnpredCost;
+ if (!Subtarget.hasBranchPredictor()) {
+ // When we don't have a branch predictor it's always cheaper to not take a
+ // branch than take it, so we have to take that into account.
+ unsigned NotTakenBranchCost = 1;
+ unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
+ unsigned TUnpredCycles, FUnpredCycles;
+ if (!FCycles) {
+ // Triangle: TBB is the fallthrough
+ TUnpredCycles = TCycles + NotTakenBranchCost;
+ FUnpredCycles = TakenBranchCost;
+ } else {
+ // Diamond: TBB is the block that is branched to, FBB is the fallthrough
+ TUnpredCycles = TCycles + TakenBranchCost;
+ FUnpredCycles = FCycles + NotTakenBranchCost;
+ }
+ // The total cost is the cost of each path scaled by their probabilites
+ unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
+ unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
+ UnpredCost = TUnpredCost + FUnpredCost;
+ // When predicating assume that the first IT can be folded away but later
+ // ones cost one cycle each
+ if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
+ PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
+ }
+ } else {
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
Probability.getCompl().scale(FCycles * ScalingUpFactor);
- unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1 * ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ }
- return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
+ return PredCost <= UnpredCost;
}
bool
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2bcc707e9fc3..e42514acd76f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -7580,6 +7580,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
dl, MVT::i32);
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+ if (isBigEndian)
+ std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
@@ -7607,10 +7610,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
Results.push_back(SDValue(CmpSwap, 2));
}
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 423f97ccacd6..891a8f482f0a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1416,12 +1416,12 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
isIndirectBranch = 1 in {
def tTBB_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
def tTBH_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 4cb0eca5ee5f..374176d1d737 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -46,6 +46,10 @@ private:
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) const;
+ bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const;
+
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
const ARMBaseTargetMachine &TM;
@@ -346,6 +350,50 @@ bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
return true;
}
+bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ auto &MBB = *MIB->getParent();
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto &DebugLoc = MIB->getDebugLoc();
+
+ // Compare the condition to 0.
+ auto CondReg = MIB->getOperand(1).getReg();
+ assert(MRI.getType(CondReg).getSizeInBits() == 1 &&
+ RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported types for select operation");
+ auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri))
+ .addUse(CondReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Move a value into the result register based on the result of the
+ // comparison.
+ auto ResReg = MIB->getOperand(0).getReg();
+ auto TrueReg = MIB->getOperand(2).getReg();
+ auto FalseReg = MIB->getOperand(3).getReg();
+ assert(MRI.getType(ResReg) == MRI.getType(TrueReg) &&
+ MRI.getType(TrueReg) == MRI.getType(FalseReg) &&
+ MRI.getType(FalseReg).getSizeInBits() == 32 &&
+ RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported types for select operation");
+ auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr))
+ .addDef(ResReg)
+ .addUse(TrueReg)
+ .addUse(FalseReg)
+ .add(predOps(ARMCC::EQ, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -448,6 +496,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
case G_ICMP:
return selectICmp(MIB, TII, MRI, TRI, RBI);
+ case G_SELECT:
+ return selectSelect(MIB, TII, MRI, TRI, RBI);
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 5873c7fb3872..f3e62d09cc30 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -55,10 +55,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (unsigned Op : {G_SDIV, G_UDIV}) {
for (auto Ty : {s8, s16})
- // FIXME: We need WidenScalar here, but in the case of targets with
- // software division we'll also need Libcall afterwards. Treat as Custom
- // until we have better support for chaining legalization actions.
- setAction({Op, Ty}, Custom);
+ setAction({Op, Ty}, WidenScalar);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
@@ -84,6 +81,10 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s32}, Legal);
+ setAction({G_SELECT, s32}, Legal);
+ setAction({G_SELECT, p0}, Legal);
+ setAction({G_SELECT, 1, s1}, Legal);
+
setAction({G_CONSTANT, s32}, Legal);
setAction({G_ICMP, s1}, Legal);
@@ -118,40 +119,6 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return false;
- case G_SDIV:
- case G_UDIV: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8))
- return false;
-
- // We need to widen to 32 bits and then maybe, if the target requires,
- // transform into a libcall.
- LegalizerHelper Helper(MIRBuilder.getMF());
-
- MachineInstr *NewMI = nullptr;
- Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
- // Store the new, 32-bit div instruction.
- if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV)
- NewMI = MI;
- });
-
- auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32));
- Helper.MIRBuilder.stopRecordingInsertions();
- if (Result == LegalizerHelper::UnableToLegalize) {
- return false;
- }
- assert(NewMI && "Couldn't find widened instruction");
- assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) &&
- "Unexpected widened instruction");
- assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 &&
- "Unexpected type for the widened instruction");
-
- Result = Helper.legalizeInstrStep(*NewMI);
- if (Result == LegalizerHelper::UnableToLegalize) {
- return false;
- }
- return true;
- }
case G_SREM:
case G_UREM: {
unsigned OriginalResult = MI.getOperand(0).getReg();
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 2350d0c6ef69..11fb81a4f9fe 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -255,6 +255,18 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_SELECT: {
+ LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
+ assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
case G_ICMP: {
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
(void)Ty2;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 02cbfb1fa9f1..b10583bc7983 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
// the general GPR register class above (MOV, e.g.)
def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow
+// the PC to be used as a destination operand as well.
+def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>;
+
// The high registers in thumb mode, R8-R15.
def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 1c7902520f2d..53e012f13ee2 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -424,3 +424,4 @@ include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
include "ARMScheduleA57.td"
+include "ARMScheduleM3.td"
diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td
new file mode 100644
index 000000000000..93f8299f9bd0
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleM3.td
@@ -0,0 +1,21 @@
+//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-M3 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM3Model : SchedMachineModel {
+ let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
+ let MicroOpBufferSize = 0; // In-order
+ let LoadLatency = 2; // Latency when not pipelined, not pc-relative
+ let MispredictPenalty = 2; // Best case branch taken cost
+
+ let CompleteModel = 0;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index d9d0c27c6304..2c42a1336166 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -11,6 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARM.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "ARMCallLowering.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
+#endif
#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
@@ -23,6 +30,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -78,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-/// EnableExecuteOnly - Enables the generation of execute-only code on supported
-/// targets
-static cl::opt<bool>
-EnableExecuteOnly("arm-execute-only");
-
ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
StringRef FS) {
ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
@@ -92,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
return new ARMFrameLowering(STI);
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct ARMGISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
- FrameLowering(initializeFrameLowering(CPU, FS)),
+ CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
+ TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -106,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this) {}
+ TLInfo(TM, *this) {
+ assert((isThumb() || hasARMOps()) &&
+ "Target must either be thumb or support ARM operations!");
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new ARMLegalizerInfo(*this));
+
+ auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
+
+ // FIXME: At this point, we can't rely on Subtarget having RBI.
+ // It's awkward to mix passing RBI and the Subtarget; should we pass
+ // TII/TRI as well?
+ GISel->InstSelector.reset(createARMInstructionSelector(
+ *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
+
+ GISel->RegBankInfo.reset(RBI);
+#endif
+ setGISelAccessor(*GISel);
+}
const CallLowering *ARMSubtarget::getCallLowering() const {
assert(GISel && "Access to GlobalISel APIs not set");
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index d890d0fa777e..e15b17512c96 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -246,6 +246,11 @@ protected:
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRetAddrStack = false;
+ /// HasBranchPredictor - True if the subtarget has a branch predictor. Having
+ /// a branch predictor or not changes the expected cost of taking a branch
+ /// which affects the choice of whether to use predicated instructions.
+ bool HasBranchPredictor = true;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension = false;
@@ -554,6 +559,7 @@ public:
bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }
+ bool hasBranchPredictor() const { return HasBranchPredictor; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index eb71e557ec91..c323a1d368de 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,6 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMCallLowering.h"
-#include "ARMLegalizerInfo.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "ARMRegisterBankInfo.h"
-#endif
#include "ARMSubtarget.h"
#include "ARMMacroFusion.h"
#include "ARMTargetMachine.h"
@@ -29,7 +24,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -110,60 +104,20 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName() == "aapcs16")
+ StringRef ABIName = Options.MCOptions.getABIName();
+
+ if (ABIName.empty())
+ ABIName = ARM::computeDefaultTargetABI(TT, CPU);
+
+ if (ABIName == "aapcs16")
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- else if (Options.MCOptions.getABIName().startswith("aapcs"))
+ else if (ABIName.startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
- else if (Options.MCOptions.getABIName().startswith("apcs"))
+ else if (ABIName.startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
- assert(Options.MCOptions.getABIName().empty() &&
- "Unknown target-abi option!");
-
- ARMBaseTargetMachine::ARMABI TargetABI =
- ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
-
- unsigned ArchKind = ARM::parseCPUArch(CPU);
- StringRef ArchName = ARM::getArchName(ArchKind);
- // FIXME: This is duplicated code from the front end and should be unified.
- if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == Triple::EABI ||
- (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- ARM::parseArchProfile(ArchName) == ARM::PK_M) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else if (TT.isWatchABI()) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- } else {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- }
- } else if (TT.isOSWindows()) {
- // FIXME: this is invalid for WindowsCE
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else {
- // Select the default based on the platform.
- switch (TT.getEnvironment()) {
- case Triple::Android:
- case Triple::GNUEABI:
- case Triple::GNUEABIHF:
- case Triple::MuslEABI:
- case Triple::MuslEABIHF:
- case Triple::EABIHF:
- case Triple::EABI:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- case Triple::GNU:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- break;
- default:
- if (TT.isOSNetBSD())
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- }
- }
-
- return TargetABI;
+ llvm_unreachable("Unhandled/unknown ABI Name!");
+ return ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -248,61 +202,39 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM,
OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
- TLOF(createTLOF(getTargetTriple())),
- Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
+ TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
// Default to triple-appropriate float ABI
- if (Options.FloatABIType == FloatABI::Default)
- this->Options.FloatABIType =
- Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default) {
+ if (TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
+ TargetTriple.getEnvironment() == Triple::EABIHF ||
+ TargetTriple.isOSWindows() ||
+ TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
+ this->Options.FloatABIType = FloatABI::Hard;
+ else
+ this->Options.FloatABIType = FloatABI::Soft;
+ }
// Default to triple-appropriate EABI
if (Options.EABIVersion == EABI::Default ||
Options.EABIVersion == EABI::Unknown) {
// musl is compatible with glibc with regard to EABI version
- if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI())
+ if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABI ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
this->Options.EABIVersion = EABI::GNU;
else
this->Options.EABIVersion = EABI::EABI5;
}
initAsmInfo();
- if (!Subtarget.isThumb() && !Subtarget.hasARMOps())
- report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
- "support ARM mode execution!");
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct ARMGISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
const ARMSubtarget *
ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -334,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
-
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
- GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new ARMLegalizerInfo(*I));
-
- auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
-
- // FIXME: At this point, we can't rely on Subtarget having RBI.
- // It's awkward to mix passing RBI and the Subtarget; should we pass
- // TII/TRI as well?
- GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI));
-
- GISel->RegBankInfo.reset(RBI);
-#endif
- I->setGISelAccessor(*GISel);
}
return I.get();
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 2fcee73228fe..f41da3e8e223 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -36,7 +36,6 @@ public:
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- ARMSubtarget Subtarget;
bool isLittle;
mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
@@ -47,8 +46,8 @@ public:
CodeGenOpt::Level OL, bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index a5b27abeb27f..88bab64ffaf2 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -32,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
- genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+ // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -43,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
AttributesSection =
getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
-
- // Make code section unreadable when in execute-only mode
- if (genExecuteOnly) {
- unsigned Type = ELF::SHT_PROGBITS;
- unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
- // Since we cannot modify flags for an existing section, we create a new
- // section with the right flags, and use 0 as the unique ID for
- // execute-only text
- TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
- }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
@@ -74,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
getContext());
}
-MCSection *
-ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK,
+ const TargetMachine &TM) {
+ if (const Function *F = dyn_cast<Function>(GO))
+ if (TM.getSubtarget<ARMSubtarget>(*F).genExecuteOnly() && SK.isText())
+ return true;
+ return false;
+}
+
+MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Set execute-only access for the explicit section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
}
-MCSection *
-ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Place the global in the execute-only text section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM);
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index dbb8128269dc..bd7aa1cfe02b 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -16,8 +16,6 @@
namespace llvm {
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
- mutable bool genExecuteOnly = false;
-
protected:
const MCSection *AttributesSection = nullptr;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 585726208a8d..5ab236b7fd4c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -486,7 +486,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- Size = 0;
+ Size = 4;
return MCDisassembler::Fail;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 81760f03940a..22de728fe06e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -738,13 +738,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
}
}
-void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) {
+bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind() ;
- if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
// If the symbol is external the linker will handle it.
@@ -753,7 +753,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// If the symbol is out of range, produce a relocation and hope the
// linker can handle it. GNU AS produces an error in this case.
if (Sym->isExternal())
- IsResolved = false;
+ return true;
}
// Create relocations for unconditional branches to function symbols with
// different execution mode in ELF binaries.
@@ -761,12 +761,12 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
- IsResolved = false;
+ return true;
if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
FixupKind == ARM::fixup_arm_thumb_bl ||
FixupKind == ARM::fixup_t2_condbranch ||
FixupKind == ARM::fixup_t2_uncondbranch))
- IsResolved = false;
+ return true;
}
}
// We must always generate a relocation for BL/BLX instructions if we have
@@ -776,7 +776,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
FixupKind == ARM::fixup_arm_blx ||
FixupKind == ARM::fixup_arm_uncondbl ||
FixupKind == ARM::fixup_arm_condbl))
- IsResolved = false;
+ return true;
+ return false;
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6a0ba2ed41c1..84b54bbb9a49 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -38,10 +38,8 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- /// processFixupValue - Target hook to process the literal value of a fixup
- /// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, uint64_t Value, bool IsPCRel,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 9f6c5d7bf920..831589ba0581 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -15,55 +15,47 @@
namespace llvm {
namespace ARM {
enum Fixups {
- // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
- // addresses
+ // 12-bit PC relative relocation for symbol addresses
fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
- // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
- // the 16-bit halfwords reordered.
+ // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered.
fixup_t2_ldst_pcrel_12,
- // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
- // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ // 10-bit PC relative relocation for symbol addresses used in
+ // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
fixup_arm_pcrel_10_unscaled,
- // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
- // used in VFP instructions where the lower 2 bits are not encoded
- // (so it's encoded as an 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where the lower 2 bits are not encoded (so it's encoded as an 8-bit
+ // immediate).
fixup_arm_pcrel_10,
- // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_10,
- // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses
- // used in VFP instructions where bit 0 not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 9-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where bit 0 not encoded (so it's encoded as an 8-bit immediate).
fixup_arm_pcrel_9,
- // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_9,
- // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
- // addresses where the lower 2 bits are not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses where the lower 2 bits
+ // are not encoded (so it's encoded as an 8-bit immediate).
fixup_thumb_adr_pcrel_10,
- // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_arm_adr_pcrel_12,
- // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_t2_adr_pcrel_12,
- // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
- // instructions.
+ // 24-bit PC relative relocation for conditional branch instructions.
fixup_arm_condbranch,
- // fixup_arm_uncondbranch - 24-bit PC relative relocation for
- // branch instructions. (unconditional)
+ // 24-bit PC relative relocation for branch instructions. (unconditional)
fixup_arm_uncondbranch,
- // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
- // uconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct uconditional branch
+ // instructions.
fixup_t2_condbranch,
- // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
- // branch unconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch
+ // instructions.
fixup_t2_uncondbranch,
- // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ // 12-bit fixup for Thumb B instructions.
fixup_arm_thumb_br,
// The following fixups handle the ARM BL instructions. These can be
@@ -75,42 +67,41 @@ enum Fixups {
// MachO does not draw a distinction between the two cases, so it will treat
// fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
- // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ // Fixup for unconditional ARM BL instructions.
fixup_arm_uncondbl,
- // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
- // conditionalisation.
+ // Fixup for ARM BL instructions with nontrivial conditionalisation.
fixup_arm_condbl,
- // fixup_arm_blx - Fixup for ARM BLX instructions.
+ // Fixup for ARM BLX instructions.
fixup_arm_blx,
- // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ // Fixup for Thumb BL instructions.
fixup_arm_thumb_bl,
- // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ // Fixup for Thumb BLX instructions.
fixup_arm_thumb_blx,
- // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ // Fixup for Thumb branch instructions.
fixup_arm_thumb_cb,
- // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ // Fixup for Thumb load/store from constant pool instrs.
fixup_arm_thumb_cp,
- // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ // Fixup for Thumb conditional branching instructions.
fixup_arm_thumb_bcc,
// The next two are for the movt/movw pair
// the 16bit imm field are split into imm{15-12} and imm{11-0}
fixup_arm_movt_hi16, // :upper16:
fixup_arm_movw_lo16, // :lower16:
- fixup_t2_movt_hi16, // :upper16:
- fixup_t2_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
- // fixup_arm_mod_imm - Fixup for mod_imm
+ // Fixup for mod_imm
fixup_arm_mod_imm,
- // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand
+ // Fixup for Thumb2 8-bit rotated operand
fixup_t2_so_imm,
// Marker
@@ -118,6 +109,6 @@ enum Fixups {
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
-}
+} // namespace llvm
#endif
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 5c3b45ac2328..d18298385adf 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -230,13 +230,25 @@ void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
namespace llvm {
// Prepare value for the target space for it
-void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
+void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t &Value,
MCContext *Ctx) const {
// The size of the fixup in bits.
uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
unsigned Kind = Fixup.getKind();
+ // Parsed LLVM-generated temporary labels are already
+ // adjusted for instruction size, but normal labels aren't.
+ //
+ // To handle both cases, we simply un-adjust the temporary label
+ // case so it acts like all other labels.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->getSymbol().isTemporary())
+ Value += 2;
+ }
+
switch (Kind) {
default:
llvm_unreachable("unhandled fixup");
@@ -333,9 +345,10 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
MCELFObjectTargetWriter::getOSABI(OSType));
}
-void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void AVRAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const {
+ adjustFixupValue(Fixup, Target, Value, &Asm.getContext());
if (Value == 0)
return; // Doesn't change encoding.
@@ -349,7 +362,7 @@ void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
@@ -436,30 +449,16 @@ bool AVRAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-void AVRAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
switch ((unsigned) Fixup.getKind()) {
+ default: return false;
// Fixups which should always be recorded as relocations.
case AVR::fixup_7_pcrel:
case AVR::fixup_13_pcrel:
case AVR::fixup_call:
- IsResolved = false;
- break;
- default:
- // Parsed LLVM-generated temporary labels are already
- // adjusted for instruction size, but normal labels aren't.
- //
- // To handle both cases, we simply un-adjust the temporary label
- // case so it acts like all other labels.
- if (Target.getSymA()->getSymbol().isTemporary())
- Value += 2;
-
- adjustFixupValue(Fixup, Value, &Asm.getContext());
- break;
+ return true;
}
}
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index f2be2494684a..4a75e3b0d22d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -35,13 +35,14 @@ public:
AVRAsmBackend(Triple::OSType OSType)
: MCAsmBackend(), OSType(OSType) {}
- void adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) const;
+ void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
+ uint64_t &Value, MCContext *Ctx = nullptr) const;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
@@ -63,10 +64,8 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
private:
Triple::OSType OSType;
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index c6ddd6bdad5e..f48429ee57b0 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -16,6 +16,7 @@
#include "BPFRegisterInfo.h"
#include "BPFSubtarget.h"
#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -57,6 +58,11 @@ private:
bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+ // Node preprocessing cases
+ void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator I);
+ void PreprocessCopyToReg(SDNode *Node);
+ void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator I);
+
// Find constants from a constant structure
typedef std::vector<unsigned char> val_vec_type;
bool fillGenericConstant(const DataLayout &DL, const Constant *CV,
@@ -69,9 +75,12 @@ private:
val_vec_type &Vals, int Offset);
bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset,
uint64_t Size, unsigned char *ByteSeq);
+ bool checkLoadDef(unsigned DefReg, unsigned match_load_op);
// Mapping from ConstantStruct global value to corresponding byte-list values
std::map<const void *, val_vec_type> cs_vals_;
+ // Mapping from vreg to load memory opcode
+ std::map<unsigned, unsigned> load_to_vreg_;
};
} // namespace
@@ -203,89 +212,110 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
+void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
+ SelectionDAG::allnodes_iterator I) {
+ union {
+ uint8_t c[8];
+ uint16_t s;
+ uint32_t i;
+ uint64_t d;
+ } new_val; // hold up the constant values replacing loads.
+ bool to_replace = false;
+ SDLoc DL(Node);
+ const LoadSDNode *LD = cast<LoadSDNode>(Node);
+ uint64_t size = LD->getMemOperand()->getSize();
+
+ if (!size || size > 8 || (size & (size - 1)))
+ return;
+
+ SDNode *LDAddrNode = LD->getOperand(1).getNode();
+ // Match LDAddr against either global_addr or (global_addr + offset)
+ unsigned opcode = LDAddrNode->getOpcode();
+ if (opcode == ISD::ADD) {
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ SDValue OP2 = LDAddrNode->getOperand(1);
+
+ // We want to find the pattern global_addr + offset
+ SDNode *OP1N = OP1.getNode();
+ if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || OP1N->getNumOperands() == 0)
+ return;
+
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode());
+ const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
+ if (GADN && CDN)
+ to_replace =
+ getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c);
+ } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
+ LDAddrNode->getNumOperands() > 0) {
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
+ to_replace = getConstantFieldValue(GADN, 0, size, new_val.c);
+ }
+
+ if (!to_replace)
+ return;
+
+ // replacing the old with a new value
+ uint64_t val;
+ if (size == 1)
+ val = new_val.c[0];
+ else if (size == 2)
+ val = new_val.s;
+ else if (size == 4)
+ val = new_val.i;
+ else {
+ val = new_val.d;
+ }
+
+ DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val
+ << '\n');
+ SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+
+ // After replacement, the current node is dead, we need to
+ // go backward one step to make iterator still work
+ I--;
+ SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)};
+ SDValue To[] = {NVal, NVal};
+ CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
+ I++;
+ // It is safe to delete node now
+ CurDAG->DeleteNode(Node);
+}
+
void BPFDAGToDAGISel::PreprocessISelDAG() {
- // Iterate through all nodes, only interested in loads from ConstantStruct
- // ConstantArray should have converted by IR->DAG processing
+ // Iterate through all nodes, interested in the following cases:
+ //
+ // . loads from ConstantStruct or ConstantArray of constructs
+ // which can be turns into constant itself, with this we can
+ // avoid reading from read-only section at runtime.
+ //
+ // . reg truncating is often the result of 8/16/32bit->64bit or
+ // 8/16bit->32bit conversion. If the reg value is loaded with
+ // masked byte width, the AND operation can be removed since
+ // BPF LOAD already has zero extension.
+ //
+ // This also solved a correctness issue.
+ // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
+ // are 32-bit registers, but later on, kernel verifier will rewrite
+ // it with 64-bit value. Therefore, truncating the value after the
+ // load will result in incorrect code.
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end();
I != E;) {
SDNode *Node = &*I++;
unsigned Opcode = Node->getOpcode();
- if (Opcode != ISD::LOAD)
- continue;
-
- union {
- uint8_t c[8];
- uint16_t s;
- uint32_t i;
- uint64_t d;
- } new_val; // hold up the constant values replacing loads.
- bool to_replace = false;
- SDLoc DL(Node);
- const LoadSDNode *LD = cast<LoadSDNode>(Node);
- uint64_t size = LD->getMemOperand()->getSize();
- if (!size || size > 8 || (size & (size - 1)))
- continue;
-
- SDNode *LDAddrNode = LD->getOperand(1).getNode();
- // Match LDAddr against either global_addr or (global_addr + offset)
- unsigned opcode = LDAddrNode->getOpcode();
- if (opcode == ISD::ADD) {
- SDValue OP1 = LDAddrNode->getOperand(0);
- SDValue OP2 = LDAddrNode->getOperand(1);
-
- // We want to find the pattern global_addr + offset
- SDNode *OP1N = OP1.getNode();
- if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END ||
- OP1N->getNumOperands() == 0)
- continue;
-
- DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
-
- const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode());
- const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
- if (GADN && CDN)
- to_replace =
- getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c);
- } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
- LDAddrNode->getNumOperands() > 0) {
- DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
-
- SDValue OP1 = LDAddrNode->getOperand(0);
- if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
- to_replace = getConstantFieldValue(GADN, 0, size, new_val.c);
- }
-
- if (!to_replace)
- continue;
-
- // replacing the old with a new value
- uint64_t val;
- if (size == 1)
- val = new_val.c[0];
- else if (size == 2)
- val = new_val.s;
- else if (size == 4)
- val = new_val.i;
- else {
- val = new_val.d;
- }
-
- DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
- << val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
-
- // After replacement, the current node is dead, we need to
- // go backward one step to make iterator still work
- I--;
- SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)};
- SDValue To[] = {NVal, NVal};
- CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
- I++;
- // It is safe to delete node now
- CurDAG->DeleteNode(Node);
+ if (Opcode == ISD::LOAD)
+ PreprocessLoad(Node, I);
+ else if (Opcode == ISD::CopyToReg)
+ PreprocessCopyToReg(Node);
+ else if (Opcode == ISD::AND)
+ PreprocessTrunc(Node, I);
}
}
@@ -415,6 +445,134 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL,
return true;
}
+void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) {
+ const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(Node->getOperand(1));
+ if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
+ return;
+
+ const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node->getOperand(2));
+ if (!LD)
+ return;
+
+ // Assign a load value to a virtual register. record its load width
+ unsigned mem_load_op = 0;
+ switch (LD->getMemOperand()->getSize()) {
+ default:
+ return;
+ case 4:
+ mem_load_op = BPF::LDW;
+ break;
+ case 2:
+ mem_load_op = BPF::LDH;
+ break;
+ case 1:
+ mem_load_op = BPF::LDB;
+ break;
+ }
+
+ DEBUG(dbgs() << "Find Load Value to VReg "
+ << TargetRegisterInfo::virtReg2Index(RegN->getReg()) << '\n');
+ load_to_vreg_[RegN->getReg()] = mem_load_op;
+}
+
+void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
+ SelectionDAG::allnodes_iterator I) {
+ ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!MaskN)
+ return;
+
+ unsigned match_load_op = 0;
+ switch (MaskN->getZExtValue()) {
+ default:
+ return;
+ case 0xFFFFFFFF:
+ match_load_op = BPF::LDW;
+ break;
+ case 0xFFFF:
+ match_load_op = BPF::LDH;
+ break;
+ case 0xFF:
+ match_load_op = BPF::LDB;
+ break;
+ }
+
+ // The Reg operand should be a virtual register, which is defined
+ // outside the current basic block. DAG combiner has done a pretty
+ // good job in removing truncating inside a single basic block.
+ SDValue BaseV = Node->getOperand(0);
+ if (BaseV.getOpcode() != ISD::CopyFromReg)
+ return;
+
+ const RegisterSDNode *RegN =
+ dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
+ if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
+ return;
+ unsigned AndOpReg = RegN->getReg();
+ DEBUG(dbgs() << "Examine %vreg" << TargetRegisterInfo::virtReg2Index(AndOpReg)
+ << '\n');
+
+ // Examine the PHI insns in the MachineBasicBlock to found out the
+ // definitions of this virtual register. At this stage (DAG2DAG
+ // transformation), only PHI machine insns are available in the machine basic
+ // block.
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+ MachineInstr *MII = nullptr;
+ for (auto &MI : *MBB) {
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ const MachineOperand &MOP = MI.getOperand(i);
+ if (!MOP.isReg() || !MOP.isDef())
+ continue;
+ unsigned Reg = MOP.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) {
+ MII = &MI;
+ break;
+ }
+ }
+ }
+
+ if (MII == nullptr) {
+ // No phi definition in this block.
+ if (!checkLoadDef(AndOpReg, match_load_op))
+ return;
+ } else {
+ // The PHI node looks like:
+ // %vreg2<def> = PHI %vreg0, <BB#1>, %vreg1, <BB#3>
+ // Trace each incoming definition, e.g., (%vreg0, BB#1) and (%vreg1, BB#3)
+ // The AND operation can be removed if both %vreg0 in BB#1 and %vreg1 in
+ // BB#3 are defined with with a load matching the MaskN.
+ DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n');
+ unsigned PrevReg = -1;
+ for (unsigned i = 0; i < MII->getNumOperands(); ++i) {
+ const MachineOperand &MOP = MII->getOperand(i);
+ if (MOP.isReg()) {
+ if (MOP.isDef())
+ continue;
+ PrevReg = MOP.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(PrevReg))
+ return;
+ if (!checkLoadDef(PrevReg, match_load_op))
+ return;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
+ dbgs() << '\n');
+
+ I--;
+ CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
+ I++;
+ CurDAG->DeleteNode(Node);
+}
+
+bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) {
+ auto it = load_to_vreg_.find(DefReg);
+ if (it == load_to_vreg_.end())
+ return false; // The definition of register is not exported yet.
+
+ return it->second == match_load_op;
+}
+
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
return new BPFDAGToDAGISel(TM);
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 2b0ceaa66258..97a53dcbaed7 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -178,8 +178,8 @@ static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
cl::init(false), cl::ZeroOrMore);
-static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
- cl::Hidden, cl::desc("Use allocframe more conservatively"));
+static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
+ cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
cl::init(true), cl::desc("Optimize spill slots"));
@@ -550,7 +550,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
- DebugLoc dl;
unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
@@ -584,77 +583,56 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
MI->eraseFromParent();
}
- if (!hasFP(MF))
- return;
-
- // Check for overflow.
- // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
- const unsigned int ALLOCFRAME_MAX = 16384;
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
- // Create a dummy memory operand to avoid allocframe from being treated as
- // a volatile memory reference.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
- 4, 4);
-
- if (NumBytes >= ALLOCFRAME_MAX) {
- // Emit allocframe(#0).
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(0)
- .addMemOperand(MMO);
-
- // Subtract offset from frame pointer.
- // We use a caller-saved non-parameter register for that.
- unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32),
- CallerSavedReg).addImm(NumBytes);
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+ if (hasFP(MF)) {
+ insertAllocframe(MBB, InsertPt, NumBytes);
+ if (AlignStack) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(MaxAlign));
+ }
+ // If the stack-checking is enabled, and we spilled the callee-saved
+ // registers inline (i.e. did not use a spill function), then call
+ // the stack checker directly.
+ if (EnableStackOVFSanitizer && !PrologueStubs)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
+ .addExternalSymbol("__runtime_stack_check");
+ } else if (NumBytes > 0) {
+ assert(alignTo(NumBytes, 8) == NumBytes);
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
- .addReg(CallerSavedReg);
- } else {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(NumBytes)
- .addMemOperand(MMO);
+ .addImm(-int(NumBytes));
}
-
- if (AlignStack) {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
- .addReg(SP)
- .addImm(-int64_t(MaxAlign));
- }
-
- // If the stack-checking is enabled, and we spilled the callee-saved
- // registers inline (i.e. did not use a spill function), then call
- // the stack checker directly.
- if (EnableStackOVFSanitizer && !PrologueStubs)
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
- .addExternalSymbol("__runtime_stack_check");
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
- if (!hasFP(MF))
- return;
-
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
unsigned SP = HRI.getStackRegister();
+ MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (!hasFP(MF)) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (unsigned NumBytes = MFI.getStackSize()) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(NumBytes);
+ }
+ return;
+ }
+
MachineInstr *RetI = getReturn(MBB);
unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
- MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
- DebugLoc DL;
- if (InsertPt != MBB.end())
- DL = InsertPt->getDebugLoc();
- else if (!MBB.empty())
- DL = std::prev(MBB.end())->getDebugLoc();
-
// Handle EH_RETURN.
if (RetOpc == Hexagon::EH_RETURN_JMPR) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
.addReg(SP)
.addReg(Hexagon::R28);
return;
@@ -699,16 +677,52 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
// otherwise just add deallocframe. The function could be returning via a
// tail call.
if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
return;
}
unsigned NewOpc = Hexagon::L4_return;
- MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+ MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc));
// Transfer the function live-out registers.
NewI->copyImplicitOps(MF, *RetI);
MBB.erase(RetI);
}
+void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
+ MachineFunction &MF = *MBB.getParent();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const unsigned int ALLOCFRAME_MAX = 16384;
+
+ // Create a dummy memory operand to avoid allocframe from being treated as
+ // a volatile memory reference.
+ auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
+ MachineMemOperand::MOStore, 4, 4);
+
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(0)
+ .addMemOperand(MMO);
+
+ // Subtract the size from the stack pointer.
+ unsigned SP = HRI.getStackRegister();
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-int(NumBytes));
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(NumBytes)
+ .addMemOperand(MMO);
+ }
+}
+
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
MachineBasicBlock &SaveB) const {
SetVector<unsigned> Worklist;
@@ -928,12 +942,11 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
}
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return false;
+
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
-
- bool HasFixed = MFI.getNumFixedObjects();
- bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
- .getLocalFrameObjectCount();
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool HasAlloca = MFI.hasVarSizedObjects();
@@ -947,18 +960,35 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
// By default we want to use SP (since it's always there). FP requires
// some setup (i.e. ALLOCFRAME).
- // Fixed and preallocated objects need FP if the distance from them to
- // the SP is unknown (as is with alloca or aligna).
- if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ // Both, alloca and stack alignment modify the stack pointer by an
+ // undetermined value, so we need to save it at the entry to the function
+ // (i.e. use allocframe).
+ if (HasAlloca || HasExtraAlign)
return true;
if (MFI.getStackSize() > 0) {
- if (EnableStackOVFSanitizer || UseAllocframe)
+ // If FP-elimination is disabled, we have to use FP at this point.
+ const TargetMachine &TM = MF.getTarget();
+ if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
+ return true;
+ if (EnableStackOVFSanitizer)
return true;
}
- if (MFI.hasCalls() ||
- MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ if (MFI.hasCalls() || HMFI.hasClobberLR())
+ return true;
+
+ // Frame pointer elimination is a possiblility at this point, but
+ // to know if FP is necessary we need to know if spill/restore
+ // functions will be used (they require FP to be valid).
+ // This means that hasFP shouldn't really be called before CSI is
+ // calculated, and some measures are taken to make sure of that
+ // (e.g. default implementations of virtual functions that call it
+ // are overridden apropriately).
+ assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI");
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI))
return true;
return false;
@@ -1051,9 +1081,10 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
- unsigned FrameSize = MFI.getStackSize();
- unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ unsigned FrameSize = MFI.getStackSize();
+ unsigned SP = HRI.getStackRegister();
+ unsigned FP = HRI.getFrameRegister();
unsigned AP = HMFI.getStackAlignBasePhysReg();
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
// is true. HasExtraAlign may be set because of vector spills, without
@@ -1135,7 +1166,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// there will be no SP -= FrameSize), so the frame size should not be
// added to the calculated offset.
int RealOffset = Offset;
- if (!UseFP && !UseAP && HasFP)
+ if (!UseFP && !UseAP)
RealOffset = FrameSize+Offset;
return RealOffset;
}
@@ -2402,7 +2433,7 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
/// be generated via inline code. If this function returns "true", inline
/// code will be generated. If this function returns "false", additional
/// checks are performed, which may still lead to the inline code.
-bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
@@ -2432,7 +2463,7 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
return false;
}
-bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
@@ -2445,7 +2476,7 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
return Threshold < NumCSI;
}
-bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 529a61d4a5b5..f4d4e1b61a26 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -48,6 +48,15 @@ public:
return true;
}
+ bool hasReservedCallFrame(const MachineFunction &MF) const override {
+ // We always reserve call frame as a part of the initial stack allocation.
+ return true;
+ }
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override {
+ // Override this function to avoid calling hasFP before CSI is set
+ // (the default implementation calls hasFP).
+ return true;
+ }
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
@@ -94,6 +103,8 @@ private:
unsigned SP, unsigned CF) const;
void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const;
void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+ void insertAllocframe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const;
bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
const HexagonRegisterInfo &HRI, bool &PrologueStubs) const;
bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
@@ -148,9 +159,9 @@ private:
void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI,
bool IsDef, bool IsKill) const;
- bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const;
- bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
- bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
+ bool shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const;
+ bool useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const;
+ bool useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const;
bool mayOverflowFrameOffset(MachineFunction &MF) const;
};
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index afed894cfb9a..2daacf795555 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1002,51 +1002,46 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue
HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
- auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
- switch (Node->getOpcode()) {
- case ISD::INLINEASM: {
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
- --NumOps; // Ignore the flag operand.
-
- for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- if (FuncInfo.hasClobberLR())
- break;
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
- ++i; // Skip the ID value.
-
- switch (InlineAsm::getKind(Flags)) {
- default: llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegUse:
- case InlineAsm::Kind_Imm:
- case InlineAsm::Kind_Clobber:
- case InlineAsm::Kind_Mem: {
- for (; NumVals; --NumVals, ++i) {}
- break;
- }
- case InlineAsm::Kind_RegDefEarlyClobber: {
- for (; NumVals; --NumVals, ++i) {
- unsigned Reg =
- cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-
- // Check it to be lr
- const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
- if (Reg == QRI->getRARegister()) {
- FuncInfo.setHasClobberLR(true);
- break;
- }
- }
- break;
- }
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ unsigned LR = HRI.getRARegister();
+
+ if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
+ return Op;
+
+ unsigned NumOps = Op.getNumOperands();
+ if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != LR)
+ continue;
+ HMFI.setHasClobberLR(true);
+ return Op;
}
+ break;
}
}
- } // Node->getOpcode
+ }
+
return Op;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fec2dc5ce306..1eac2d3dd8e2 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1253,10 +1253,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ unsigned PReg = Op1.getReg();
+ assert(Op1.getSubReg() == 0);
+ unsigned PState = getRegState(Op1);
+
if (Op0.getReg() != Op2.getReg()) {
+ unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill
+ : PState;
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, S)
.add(Op2);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
@@ -1265,7 +1271,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (Op0.getReg() != Op3.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, PState)
.add(Op3);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
@@ -1282,12 +1288,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ unsigned PReg = Op1.getReg();
+ assert(Op1.getSubReg() == 0);
+ unsigned PState = getRegState(Op1);
if (Op0.getReg() != Op2.getReg()) {
+ unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill
+ : PState;
unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
.add(Op0)
+ .addReg(PReg, S)
.add(Op1)
.addReg(SrcHi)
.addReg(SrcLo);
@@ -1300,7 +1312,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, PState)
.addReg(SrcHi)
.addReg(SrcLo);
if (IsDestLive)
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index de6b203015d8..e93f075f4ccd 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -69,9 +69,7 @@ namespace {
public:
static char ID;
- HexagonNewValueJump() : MachineFunctionPass(ID) {
- initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
- }
+ HexagonNewValueJump() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
@@ -445,8 +443,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
unsigned predReg = 0; // predicate reg of the jump.
unsigned cmpReg1 = 0;
int cmpOp2 = 0;
- bool MO1IsKill = false;
- bool MO2IsKill = false;
MachineBasicBlock::iterator jmpPos;
MachineBasicBlock::iterator cmpPos;
MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
@@ -548,14 +544,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// We need cmpReg1 and cmpOp2(imm or reg) while building
// new value jump instruction.
cmpReg1 = MI.getOperand(1).getReg();
- if (MI.getOperand(1).isKill())
- MO1IsKill = true;
- if (isSecondOpReg) {
+ if (isSecondOpReg)
cmpOp2 = MI.getOperand(2).getReg();
- if (MI.getOperand(2).isKill())
- MO2IsKill = true;
- } else
+ else
cmpOp2 = MI.getOperand(2).getImm();
continue;
}
@@ -605,11 +597,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) &&
(feederReg == (unsigned) cmpOp2)) {
unsigned tmp = cmpReg1;
- bool tmpIsKill = MO1IsKill;
cmpReg1 = cmpOp2;
- MO1IsKill = MO2IsKill;
cmpOp2 = tmp;
- MO2IsKill = tmpIsKill;
}
// Now we have swapped the operands, all we need to check is,
@@ -623,31 +612,33 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// make sure we are respecting the kill values of
// the operands of the feeder.
- bool updatedIsKill = false;
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isUse()) {
- unsigned feederReg = MO.getReg();
- for (MachineBasicBlock::iterator localII = feederPos,
- end = cmpInstr->getIterator(); localII != end; localII++) {
- MachineInstr &localMI = *localII;
- for (unsigned j = 0; j < localMI.getNumOperands(); j++) {
- MachineOperand &localMO = localMI.getOperand(j);
- if (localMO.isReg() && localMO.isUse() &&
- localMO.isKill() && feederReg == localMO.getReg()) {
- // We found that there is kill of a use register
- // Set up a kill flag on the register
- localMO.setIsKill(false);
- MO.setIsKill();
- updatedIsKill = true;
- break;
- }
+ auto TransferKills = [jmpPos,cmpPos] (MachineInstr &MI) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned UseR = MO.getReg();
+ for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) {
+ if (I == cmpPos)
+ continue;
+ for (MachineOperand &Op : I->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.isKill())
+ continue;
+ if (Op.getReg() != UseR)
+ continue;
+ // We found that there is kill of a use register
+ // Set up a kill flag on the register
+ Op.setIsKill(false);
+ MO.setIsKill(true);
+ return;
}
- if (updatedIsKill) break;
}
}
- if (updatedIsKill) break;
- }
+ };
+
+ TransferKills(*feederPos);
+ TransferKills(*cmpPos);
+ bool MO1IsKill = cmpPos->killsRegister(cmpReg1, QRI);
+ bool MO2IsKill = isSecondOpReg && cmpPos->killsRegister(cmpOp2, QRI);
MBB->splice(jmpPos, MI.getParent(), MI);
MBB->splice(jmpPos, MI.getParent(), cmpInstr);
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 27b40f134b1f..a331c978f59d 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -535,9 +535,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
!MI->getOperand(1).isGlobal())
continue;
- DEBUG(dbgs() << "[Analyzing A2_tfrsi]: " << *MI << "\n");
- DEBUG(dbgs() << "\t[InstrNode]: " << Print<NodeAddr<InstrNode *>>(IA, *DFG)
- << "\n");
+ DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: "
+ << *MI << "\n\t[InstrNode]: "
+ << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n');
NodeList UNodeList;
getAllRealUses(SA, UNodeList);
@@ -605,7 +605,9 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
const TargetOperandInfo TOI(*HII);
DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI);
- G.build();
+ // Need to keep dead phis because we can propagate uses of registers into
+ // nodes dominated by those would-be phis.
+ G.build(BuildOptions::KeepDeadPhis);
DFG = &G;
Liveness L(MRI, *DFG);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 031a1bdefafb..76d9b31b005f 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -113,6 +113,7 @@ namespace llvm {
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonOptAddrModePass(PassRegistry&);
+ void initializeHexagonNewValueJumpPass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
@@ -158,6 +159,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
initializeHexagonLoopIdiomRecognizePass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonOptAddrModePass(PR);
+ initializeHexagonNewValueJumpPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 4dacb1501392..34df2ebcc520 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -49,6 +49,10 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
cl::Hidden, cl::init(false),
cl::desc("Trace global value placement"));
+static cl::opt<bool>
+ EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false),
+ cl::desc("Emit hexagon jump tables in function section"));
+
// TraceGVPlacement controls messages for all builds. For builds with assertions
// (debug or release), messages are also controlled by the usual debug flags
// (e.g. -debug and -debug-only=globallayout)
@@ -256,6 +260,11 @@ unsigned HexagonTargetObjectFile::getSmallDataSize() const {
return SmallDataThreshold;
}
+bool HexagonTargetObjectFile::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ return EmitJtInText;
+}
+
/// Descends any type down to "elementary" components,
/// discovering the smallest addressable one.
/// If zero is returned, declaration will not be modified.
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 58dff2b95e19..373d850b53be 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -33,6 +33,9 @@ namespace llvm {
unsigned getSmallDataSize() const;
+ bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+ const Function &F) const override;
+
private:
MCSectionELF *SmallDataSection;
MCSectionELF *SmallBSSSection;
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index d578bfab3658..aac810e29fe9 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -21,6 +21,10 @@ using namespace llvm;
#define DEBUG_TYPE "hexagontti"
+static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
+ cl::init(true), cl::Hidden,
+ cl::desc("Control lookup table emission on Hexagon target"));
+
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// Return Fast Hardware support as every input < 64 bits will be promoted
@@ -29,7 +33,7 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
}
// The Hexagon target can unroll loops with run-time trip counts.
-void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
}
@@ -46,8 +50,9 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
return getST()->getL1CacheLineSize();
}
-int HexagonTTIImpl::getUserCost(const User *U) {
- auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool {
+int HexagonTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) {
+ auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
@@ -67,5 +72,9 @@ int HexagonTTIImpl::getUserCost(const User *U) {
if (const CastInst *CI = dyn_cast<const CastInst>(U))
if (isCastFoldedIntoLoad(CI))
return TargetTransformInfo::TCC_Free;
- return BaseT::getUserCost(U);
+ return BaseT::getUserCost(U, Operands);
+}
+
+bool HexagonTTIImpl::shouldBuildLookupTables() const {
+ return EmitLookupTables;
}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 8414bfc4e197..ab5a6e07d873 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -46,7 +46,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
// The Hexagon target can unroll loops with run-time trip counts.
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
// L1 cache prefetch.
unsigned getPrefetchDistance() const;
@@ -61,7 +62,10 @@ public:
/// @}
- int getUserCost(const User *U);
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands);
+
+ // Hexagon specific decision to generate a lookup table.
+ bool shouldBuildLookupTables() const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 093ce80bc2e3..34d0b55aa22a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -199,11 +199,8 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- /// processFixupValue - Target hook to adjust the literal value of a fixup
- /// if necessary. IsResolved signals whether the caller believes a relocation
- /// is needed; the target can modify the value. The default does nothing.
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
MCFixupKind Kind = Fixup.getKind();
switch((unsigned)Kind) {
@@ -299,8 +296,7 @@ public:
case fixup_Hexagon_LD_PLT_B22_PCREL_X:
case fixup_Hexagon_LD_PLT_B32_PCREL_X:
// These relocations should always have a relocation recorded
- IsResolved = false;
- return;
+ return true;
case fixup_Hexagon_B22_PCREL:
//IsResolved = false;
@@ -317,7 +313,7 @@ public:
case fixup_Hexagon_B7_PCREL:
case fixup_Hexagon_B7_PCREL_X:
if (DisableFixup)
- IsResolved = false;
+ return true;
break;
case FK_Data_1:
@@ -326,8 +322,9 @@ public:
case FK_PCRel_4:
case fixup_Hexagon_32:
// Leave these relocations alone as they are used for EH.
- return;
+ return false;
}
+ return false;
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 9d5c179a0fd9..69b1ba1528d0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -2789,6 +2789,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
bool Is32BitSym, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
+ // FIXME: These expansions do not respect -mxgot.
MipsTargetStreamer &TOut = getTargetStreamer();
bool UseSrcReg = SrcReg != Mips::NoRegister;
warnIfNoMacro(IDLoc);
@@ -2808,8 +2809,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
// symbol in the final relocation is external and not modified with a
// constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16.
if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
- Res.getConstant() == 0 && !Res.getSymA()->getSymbol().isInSection() &&
- !Res.getSymA()->getSymbol().isTemporary()) {
+ Res.getConstant() == 0 &&
+ !(Res.getSymA()->getSymbol().isInSection() ||
+ Res.getSymA()->getSymbol().isTemporary() ||
+ (Res.getSymA()->getSymbol().isELF() &&
+ cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
+ ELF::STB_LOCAL))) {
const MCExpr *CallExpr =
MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(),
@@ -2865,6 +2870,85 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
return false;
}
+ if (inPicMode() && ABI.ArePtrs64bit()) {
+ MCValue Res;
+ if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
+ Error(IDLoc, "expected relocatable expression");
+ return true;
+ }
+ if (Res.getSymB() != nullptr) {
+ Error(IDLoc, "expected relocatable expression with only one symbol");
+ return true;
+ }
+
+ // The case where the result register is $25 is somewhat special. If the
+ // symbol in the final relocation is external and not modified with a
+ // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP.
+ if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
+ Res.getConstant() == 0 &&
+ !(Res.getSymA()->getSymbol().isInSection() ||
+ Res.getSymA()->getSymbol().isTemporary() ||
+ (Res.getSymA()->getSymbol().isELF() &&
+ cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
+ ELF::STB_LOCAL))) {
+ const MCExpr *CallExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
+ TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(),
+ MCOperand::createExpr(CallExpr), IDLoc, STI);
+ return false;
+ }
+
+ // The remaining cases are:
+ // Small offset: ld $tmp, %got_disp(symbol)($gp)
+ // >daddiu $tmp, $tmp, offset
+ // >daddu $rd, $tmp, $rs
+ // The daddiu's marked with a '>' may be omitted if they are redundant. If
+ // this happens then the last instruction must use $rd as the result
+ // register.
+ const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP,
+ Res.getSymA(),
+ getContext());
+ const MCExpr *LoExpr = nullptr;
+ if (Res.getConstant() != 0) {
+ // Symbols fully resolve with just the %got_disp(symbol) but we
+ // must still account for any offset to the symbol for
+ // expressions like symbol+8.
+ LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
+
+ // FIXME: Offsets greater than 16 bits are not yet implemented.
+ // FIXME: The correct range is a 32-bit sign-extended number.
+ if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) {
+ Error(IDLoc, "macro instruction uses large offset, which is not "
+ "currently supported");
+ return true;
+ }
+ }
+
+ unsigned TmpReg = DstReg;
+ if (UseSrcReg &&
+ getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
+ SrcReg)) {
+ // If $rs is the same as $rd, we need to use AT.
+ // If it is not available we exit.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ TmpReg = ATReg;
+ }
+
+ TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(),
+ MCOperand::createExpr(GotExpr), IDLoc, STI);
+
+ if (LoExpr)
+ TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
+ IDLoc, STI);
+
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI);
+
+ return false;
+ }
+
const MipsMCExpr *HiExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HI, SymExpr, getContext());
const MipsMCExpr *LoExpr =
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
index 6b7f39e9dd79..38b09d105ddd 100644
--- a/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -548,3 +548,15 @@ def : MipsInstAlias<"dnegu $rt, $rs",
def : MipsInstAlias<"dnegu $rt",
(DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsll $rd, $rt, $rs",
+ (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt,
+ GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsrl $rd, $rt, $rs",
+ (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt,
+ GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsrl $rd, $rt",
+ (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd,
+ GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsll $rd, $rt",
+ (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd,
+ GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 99025fe1341d..3dba7ce30cad 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -748,9 +748,6 @@ let AdditionalPredicates = [NotInMicroMips] in {
defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>,
GPR_64;
}
-def : MipsInstAlias<"dsll $rd, $rt, $rs",
- (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
- ISA_MIPS3;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"dneg $rt, $rs",
(DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
@@ -793,9 +790,18 @@ def : MipsInstAlias<"dsra $rd, $rt, $rs",
(DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsInstAlias<"dsll $rd, $rt, $rs",
+ (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
+ ISA_MIPS3;
def : MipsInstAlias<"dsrl $rd, $rt, $rs",
(DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
+ def : MipsInstAlias<"dsrl $rd, $rt",
+ (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>,
+ ISA_MIPS3;
+ def : MipsInstAlias<"dsll $rd, $rt",
+ (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>,
+ ISA_MIPS3;
// Two operand (implicit 0 selector) versions:
def : MipsInstAlias<"dmtc0 $rt, $rd",
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 5d82571ff94f..4a34e3101cb8 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -564,7 +564,7 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
// For given opcode returns opcode of corresponding instruction with short
// delay slot.
-// For the pseudo TAILCALL*_MM instrunctions return the short delay slot
+// For the pseudo TAILCALL*_MM instructions return the short delay slot
// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
// that is too short to make use of for tail calls.
static int getEquivalentCallShort(int Opcode) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 02102d6b22f4..a6ec9fb2e598 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -364,18 +364,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) {
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- }
-
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
-
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -481,7 +469,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AssertZext);
setTargetDAGCombine(ISD::SHL);
@@ -936,130 +923,14 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
}
}
-static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
- const MipsSubtarget &Subtarget) {
- // ROOTNode must have a multiplication as an operand for the match to be
- // successful.
- if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL &&
- ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
- return SDValue();
-
- // We don't handle vector types here.
- if (ROOTNode->getValueType(0).isVector())
- return SDValue();
-
- // For MIPS64, madd / msub instructions are inefficent to use with 64 bit
- // arithmetic. E.g.
- // (add (mul a b) c) =>
- // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in
- // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32)
- // or
- // MIPS64R2: (dins (mflo res) (mfhi res) 32 32)
- //
- // The overhead of setting up the Hi/Lo registers and reassembling the
- // result makes this a dubious optimzation for MIPS64. The core of the
- // problem is that Hi/Lo contain the upper and lower 32 bits of the
- // operand and result.
- //
- // It requires a chain of 4 add/mul for MIPS64R2 to get better code
- // density than doing it naively, 5 for MIPS64. Additionally, using
- // madd/msub on MIPS64 requires the operands actually be 32 bit sign
- // extended operands, not true 64 bit values.
- //
- // FIXME: For the moment, disable this completely for MIPS64.
- if (Subtarget.hasMips64())
- return SDValue();
-
- SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
- ? ROOTNode->getOperand(0)
- : ROOTNode->getOperand(1);
-
- SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
- ? ROOTNode->getOperand(1)
- : ROOTNode->getOperand(0);
-
- // Transform this to a MADD only if the user of this node is the add.
- // If there are other users of the mul, this function returns here.
- if (!Mult.hasOneUse())
- return SDValue();
-
- // maddu and madd are unusual instructions in that on MIPS64 bits 63..31
- // must be in canonical form, i.e. sign extended. For MIPS32, the operands
- // of the multiply must have 32 or more sign bits, otherwise we cannot
- // perform this optimization. We have to check this here as we're performing
- // this optimization pre-legalization.
- SDValue MultLHS = Mult->getOperand(0);
- SDValue MultRHS = Mult->getOperand(1);
- unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS);
- unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS);
-
- if (LHSSB < 32 || RHSSB < 32)
- return SDValue();
-
- APInt HighMask =
- APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32);
- bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) &&
- CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) &&
- CurDAG.MaskedValueIsZero(AddOperand, HighMask);
-
- // Initialize accumulator.
- SDLoc DL(ROOTNode);
- SDValue TopHalf;
- SDValue BottomHalf;
- BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(0, DL));
-
- TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(1, DL));
- SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- BottomHalf,
- TopHalf);
-
- // Create MipsMAdd(u) / MipsMSub(u) node.
- bool IsAdd = ROOTNode->getOpcode() == ISD::ADD;
- unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd)
- : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub);
- SDValue MAddOps[3] = {
- CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)),
- CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn};
- EVT VTs[2] = {MVT::i32, MVT::i32};
- SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps);
-
- SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
- SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
- SDValue Combined =
- CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi);
- return Combined;
-}
-
-static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- // (sub v0 (mul v1, v2)) => (msub v1, v2, v0)
- if (DCI.isBeforeLegalizeOps()) {
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
- return performMADD_MSUBCombine(N, DAG, Subtarget);
-
- return SDValue();
- }
-
- return SDValue();
-}
-
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // (add v0 (mul v1, v2)) => (madd v1, v2, v0)
- if (DCI.isBeforeLegalizeOps()) {
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
- return performMADD_MSUBCombine(N, DAG, Subtarget);
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
- }
- // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
SDValue Add = N->getOperand(1);
if (Add.getOpcode() != ISD::ADD)
@@ -1187,8 +1058,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performAssertZextCombine(N, DAG, DCI, Subtarget);
case ISD::SHL:
return performSHLCombine(N, DAG, DCI, Subtarget);
- case ISD::SUB:
- return performSUBCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 4be26dd25dc0..49ae6dd4cd39 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -245,64 +245,46 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
}
-void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = InFlag.getOpcode();
+void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, const SDLoc &DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
+ if (Subtarget->isGP64bit()) {
+ SLTuOp = Mips::SLTu64;
+ ADDuOp = Mips::DADDu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- // In the base case, we can rely on the carry bit from the addsc
- // instruction.
- if (Opc == ISD::ADDC) {
- SDValue Ops[3] = {LHS, RHS, InFlag};
- CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
- return;
+ SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
+
+ if (Subtarget->isGP64bit()) {
+ // On 64-bit targets, sltu produces an i64 but our backend currently says
+ // that SLTu64 produces an i32. We need to fix this in the long run but for
+ // now, just make the DAG type-correct by asserting the upper bits are zero.
+ Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
+ CurDAG->getTargetConstant(0, DL, VT),
+ SDValue(Carry, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL,
+ VT));
}
- assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!");
-
- // The more complex case is when there is a chain of ISD::ADDE nodes like:
- // (adde (adde (adde (addc a b) c) d) e).
- //
- // The addwc instruction does not write to the carry bit, instead it writes
- // to bit 20 of the dsp control register. To match this series of nodes, each
- // intermediate adde node must be expanded to write the carry bit before the
- // addition.
-
- // Start by reading the overflow field for addsc and moving the value to the
- // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP
- // corresponds to reading/writing the entire control register to/from a GPR.
-
- SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32);
-
- SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
-
- SDNode *DSPCtrlField =
- CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
-
- SDNode *Carry = CurDAG->getMachineNode(
- Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
- SDValue Ops[4] = {SDValue(DSPCtrlField, 0),
- CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne,
- SDValue(Carry, 0)};
- SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops);
-
- // My reading of the the MIPS DSP 3.01 specification isn't as clear as I
- // would like about whether bit 20 always gets overwritten by addwc.
- // Hence take an extremely conservative view and presume it's sticky. We
- // therefore need to clear it.
-
- SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
-
- SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
- SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
-
- SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
- SDValue(DSPCtrlFinal, 0), CstOne);
-
- SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)};
- CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands);
+ CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
}
/// Match frameindex
@@ -783,8 +765,19 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
+ selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
+ return true;
+ }
+
case ISD::ADDE: {
- selectAddE(Node, DL);
+ if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+ break;
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
+ selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
return true;
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6f38289c5a45..f89a350cab04 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -41,7 +41,8 @@ private:
const SDLoc &dl, EVT Ty, bool HasLo,
bool HasHi);
- void selectAddE(SDNode *Node, const SDLoc &DL) const;
+ void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ const SDLoc &DL, SDNode *Node) const;
bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index b57bceb3c837..06a97b9d123e 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -179,6 +179,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -419,6 +421,163 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
return MipsTargetLowering::LowerOperation(Op, DAG);
}
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDLoc DL(ADDENode);
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ ADDCNode->getOperand(1),
+ ADDENode->getOperand(1));
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty()) {
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+ }
+ if (!SDValue(ADDENode, 0).use_empty()) {
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDLoc DL(SUBENode);
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ SUBCNode->getOperand(0),
+ SUBENode->getOperand(0));
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty()) {
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+ }
+ if (!SDValue(SUBENode, 0).use_empty()) {
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
//
// Performs the following transformations:
@@ -661,6 +820,19 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMSUB(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Clear the upper (64 - VT.sizeInBits) bits.
@@ -938,12 +1110,16 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Val;
switch (N->getOpcode()) {
+ case ISD::ADDE:
+ return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::AND:
Val = performANDCombine(N, DAG, DCI, Subtarget);
break;
case ISD::OR:
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
+ case ISD::SUBE:
+ return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index dd7707084948..a64d95512a4a 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -141,9 +141,9 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
}
}
-void NVPTXTTIImpl::getUnrollingPreferences(Loop *L,
+void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
// Enable partial unrolling and runtime unrolling, but reduce the
// threshold. This partially unrolls small loops which are often
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 03075b550429..f987892ba675 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,7 +61,8 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 6d7eb786a683..7393f3d7a08a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -131,10 +131,11 @@ public:
}
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
switch ((PPC::Fixups)Fixup.getKind()) {
- default: break;
+ default:
+ return false;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
// If the target symbol has a local entry point we must not attempt
@@ -147,10 +148,10 @@ public:
// and thus the shift to pack it.
unsigned Other = S->getOther() << 2;
if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
- IsResolved = false;
+ return true;
}
}
- break;
+ return false;
}
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ae43e59d3cb1..dce443997ea5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -17,35 +17,31 @@
namespace llvm {
namespace PPC {
enum Fixups {
- // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
- // and 'bl'.
+ // 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
-
- /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
- /// branches.
+
+ /// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,
-
- /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
- /// like 'ba' and 'bla'.
+
+ /// 24-bit absolute relocation for direct branches like 'ba' and 'bla'.
fixup_ppc_br24abs,
- /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
- /// branches.
+ /// 14-bit absolute relocation for conditional branches.
fixup_ppc_brcond14abs,
- /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
- /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ /// A 16-bit fixup corresponding to lo16(_foo) or ha16(_foo) for instrs like
+ /// 'li' or 'addis'.
fixup_ppc_half16,
-
- /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
- /// implied 2 zero bits for instrs like 'std'.
+
+ /// A 14-bit fixup corresponding to lo16(_foo) with implied 2 zero bits for
+ /// instrs like 'std'.
fixup_ppc_half16ds,
- /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
- /// to __tls_get_addr for the TLS general and local dynamic models,
- /// or inserts the thread-pointer register number.
+ /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
+ /// TLS general and local dynamic models, or inserts the thread-pointer
+ /// register number.
fixup_ppc_nofixup,
-
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 6d591ca964a6..d5506277ca88 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -219,11 +219,11 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
const MCSymbol *SB = &B->getSymbol();
if (!SB->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ report_fatal_error("symbol '" + SB->getName() +
"' can not be undefined in a subtraction expression");
// FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
- Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ Value2 = Writer->getSymbolAddress(*SB, Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
// FIXME: does FixedValue get used??
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 07c9c1f9f84c..ad92ac8ce120 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
#define LLVM_LIB_TARGET_POWERPC_PPC_H
+#include "llvm/Support/CodeGen.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
// GCC #defines PPC on Linux but we use it as our namespace name
@@ -28,7 +29,7 @@ namespace llvm {
class AsmPrinter;
class MCInst;
- FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+ FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -41,7 +42,7 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCQPXLoadSplatPass();
- FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
FunctionPass *createPPCExpandISELPass();
@@ -51,6 +52,7 @@ namespace llvm {
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
+ void initializePPCTLSDynamicCallPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 24bc027f8106..094d3e6a61b5 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -24,12 +24,14 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -81,10 +83,7 @@ namespace {
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
- PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ PPCCTRLoops() : FunctionPass(ID) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
@@ -99,16 +98,18 @@ namespace {
}
private:
- bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool mightUseCTR(BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *STI;
+ const PPCTargetLowering *TLI;
+ const DataLayout *DL;
+ const TargetLibraryInfo *LibInfo;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
DominatorTree *DT;
- const TargetLibraryInfo *LibInfo;
bool PreserveLCSSA;
};
@@ -149,9 +150,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
- return new PPCCTRLoops(TM);
-}
+FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
@@ -169,6 +168,14 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<PPCTargetMachine>();
+ STI = TM->getSubtargetImpl(F);
+ TLI = STI->getTargetLowering();
+
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -198,8 +205,7 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine *TM,
- const Value *MemAddr) {
+static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
@@ -213,35 +219,35 @@ static bool memAddrUsesCTR(const PPCTargetMachine *TM,
if (!GV->isThreadLocal())
return false;
- if (!TM)
- return true;
- TLSModel::Model Model = TM->getTLSModel(GV);
+ TLSModel::Model Model = TM.getTLSModel(GV);
return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
}
-bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+// Loop through the inline asm constraints and look for something that clobbers
+// ctr.
+static bool asmClobbersCTR(InlineAsm *IA) {
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+ return false;
+}
+
+bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
- if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
- return true;
- }
-
+ if (asmClobbersCTR(IA))
+ return true;
continue;
}
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
// sin, cos, exp and log are always calls.
@@ -380,9 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
}
if (Opcode) {
- auto &DL = CI->getModule()->getDataLayout();
- MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
- true);
+ MVT VTy = TLI->getSimpleValueType(
+ *DL, CI->getArgOperand(0)->getType(), true);
if (VTy == MVT::Other)
return true;
@@ -406,17 +411,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
+ isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
return true;
- } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ } else if (isLargeIntegerTy(!TM->isPPC64(),
J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
J->getOpcode() == Instruction::SDiv ||
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
- } else if (TT.isArch32Bit() &&
+ } else if (!TM->isPPC64() &&
isLargeIntegerTy(false, J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::Shl ||
J->getOpcode() == Instruction::AShr ||
@@ -428,16 +433,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
- if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) {
+ if (STI->useSoftFloat()) {
switch(J->getOpcode()) {
case Instruction::FAdd:
case Instruction::FSub:
@@ -456,7 +456,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
}
for (Value *Operand : J->operands())
- if (memAddrUsesCTR(TM, Operand))
+ if (memAddrUsesCTR(*TM, Operand))
return true;
}
@@ -466,11 +466,6 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
- const Triple TT =
- Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
- if (!TT.isArch32Bit() && !TT.isArch64Bit())
- return MadeChange; // Unknown arch. type.
-
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
@@ -495,7 +490,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// want to use the counter register if the loop contains calls.
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
- if (mightUseCTR(TT, *I))
+ if (mightUseCTR(*I))
return MadeChange;
SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -517,7 +512,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
} else if (!SE->isLoopInvariant(EC, L))
continue;
- if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
continue;
// We now have a loop-invariant count of loop iterations (which is not the
@@ -571,7 +566,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// preheader, then we can use it (except if the preheader contains a use of
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
- if (!Preheader || mightUseCTR(TT, Preheader))
+ if (!Preheader || mightUseCTR(Preheader))
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
@@ -582,10 +577,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
+ SCEVExpander SCEVE(*SE, *DL, "loopcnt");
LLVMContext &C = SE->getContext();
- Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
- Type::getInt32Ty(C);
+ Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index afd2e87078a9..535b9deaefac 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -114,8 +114,8 @@ namespace {
unsigned GlobalBaseReg;
public:
- explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm) {}
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
@@ -5116,6 +5116,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
- return new PPCDAGToDAGISel(TM);
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PPCDAGToDAGISel(TM, OptLevel);
}
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 31c50785c2ee..5f8085f4626e 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -52,6 +52,7 @@ namespace {
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
@@ -62,6 +63,16 @@ protected:
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32) {
+
+ // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
+ // as scheduling fences, we skip creating fences if we already
+ // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
+ // which causes verification error with -verify-machineinstrs.
+ if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN)
+ NeedFence = false;
+ else if (MI.getOpcode() == PPC::ADJCALLSTACKUP)
+ NeedFence = true;
+
++I;
continue;
}
@@ -96,11 +107,15 @@ protected:
break;
}
- // Don't really need to save data to the stack - the clobbered
+ // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
+ // as schduling fence to avoid it is scheduled before
+ // mflr in the prologue and the address in LR is clobbered (PR25839).
+ // We don't really need to save data to the stack - the clobbered
// registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
// gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
- .addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
+ .addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
@@ -116,7 +131,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI.getOperand(3));
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index a88a6541e8d0..fe092cc3b858 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,6 +93,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
+ initializePPCTLSDynamicCallPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -336,7 +337,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops(getPPCTargetMachine()));
+ addPass(createPPCCTRLoops());
return false;
}
@@ -352,7 +353,7 @@ bool PPCPassConfig::addILPOpts() {
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- addPass(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
#ifndef NDEBUG
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 5eb6ba785d1b..2dc3828334ac 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,6 +41,7 @@ public:
~PPCTargetMachine() override;
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
+ const PPCSubtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3dbd5f5b9a92..6110706b01b9 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -189,7 +189,7 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
@@ -201,7 +201,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
UP.AllowExpensiveTripCount = true;
}
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 758c335def08..99ca6394d1be 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -52,7 +52,8 @@ public:
Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
/// @}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index c72b47b09085..d4454c271f5a 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -203,13 +203,14 @@ namespace {
return InfosBE[Kind - FirstTargetFixupKind];
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
switch ((Sparc::Fixups)Fixup.getKind()) {
- default: break;
+ default:
+ return false;
case Sparc::fixup_sparc_wplt30:
if (Target.getSymA()->getSymbol().isTemporary())
- return;
+ return false;
case Sparc::fixup_sparc_tls_gd_hi22:
case Sparc::fixup_sparc_tls_gd_lo10:
case Sparc::fixup_sparc_tls_gd_add:
@@ -227,7 +228,8 @@ namespace {
case Sparc::fixup_sparc_tls_ie_ldx:
case Sparc::fixup_sparc_tls_ie_add:
case Sparc::fixup_sparc_tls_le_hix22:
- case Sparc::fixup_sparc_tls_le_lox10: IsResolved = false; break;
+ case Sparc::fixup_sparc_tls_le_lox10:
+ return true;
}
}
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index ad05779a9f64..ee23692ad1db 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -61,6 +61,7 @@ enum RegisterKind {
VR64Reg,
VR128Reg,
AR32Reg,
+ CR64Reg,
};
enum MemoryKind {
@@ -343,6 +344,7 @@ public:
bool isVF128() const { return false; }
bool isVR128() const { return isReg(VR128Reg); }
bool isAR32() const { return isReg(AR32Reg); }
+ bool isCR64() const { return isReg(CR64Reg); }
bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
@@ -379,7 +381,8 @@ private:
RegGR,
RegFP,
RegV,
- RegAR
+ RegAR,
+ RegCR
};
struct Register {
RegisterGroup Group;
@@ -487,6 +490,9 @@ public:
OperandMatchResultTy parseAR32(OperandVector &Operands) {
return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
}
+ OperandMatchResultTy parseCR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg);
+ }
OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
return parseAnyRegister(Operands);
}
@@ -648,6 +654,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
Reg.Group = RegV;
else if (Prefix == 'a' && Reg.Num < 16)
Reg.Group = RegAR;
+ else if (Prefix == 'c' && Reg.Num < 16)
+ Reg.Group = RegCR;
else
return Error(Reg.StartLoc, "invalid register");
@@ -741,6 +749,10 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
Kind = AR32Reg;
RegNo = SystemZMC::AR32Regs[Reg.Num];
}
+ else if (Reg.Group == RegCR) {
+ Kind = CR64Reg;
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
+ }
else {
return MatchOperand_ParseFail;
}
@@ -1056,6 +1068,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = SystemZMC::VR128Regs[Reg.Num];
else if (Reg.Group == RegAR)
RegNo = SystemZMC::AR32Regs[Reg.Num];
+ else if (Reg.Group == RegCR)
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
StartLoc = Reg.StartLoc;
EndLoc = Reg.EndLoc;
return false;
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 27fd70bc6092..8903b57ffd0b 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -162,6 +162,12 @@ static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16);
}
+static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16);
+}
+
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
if (!isUInt<N>(Imm))
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index dfea7e33fa15..727ab921daf9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -116,6 +116,13 @@ const unsigned SystemZMC::AR32Regs[16] = {
SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15
};
+const unsigned SystemZMC::CR64Regs[16] = {
+ SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3,
+ SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7,
+ SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11,
+ SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15
+};
+
unsigned SystemZMC::getFirstReg(unsigned Reg) {
static unsigned Map[SystemZ::NUM_TARGET_REGS];
static bool Initialized = false;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index d9926c7e4986..dbca3485290a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -55,6 +55,7 @@ extern const unsigned VR32Regs[32];
extern const unsigned VR64Regs[32];
extern const unsigned VR128Regs[32];
extern const unsigned AR32Regs[16];
+extern const unsigned CR64Regs[16];
// Return the 0-based number of the first architectural register that
// contains the given LLVM register. E.g. R1D -> 1.
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 74cf653b9d95..9b714157550d 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -67,6 +67,11 @@ We don't use ICM, STCM, or CLM.
--
+We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH,
+or COMPARE (LOGICAL) HIGH yet.
+
+--
+
DAGCombiner doesn't yet fold truncations of extended loads. Functions like:
unsigned long f (unsigned long x, unsigned short *y)
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index c5f324418da5..41300a1b6295 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -56,6 +56,7 @@ include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
include "SystemZInstrHFP.td"
include "SystemZInstrDFP.td"
+include "SystemZInstrSystem.td"
def SystemZInstrInfo : InstrInfo {}
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index ffb0b8d1c861..c5faa0d62881 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -68,11 +68,21 @@ def FeaturePopulationCount : SystemZFeature<
"Assume that the population-count facility is installed"
>;
+def FeatureMessageSecurityAssist3 : SystemZFeature<
+ "message-security-assist-extension3", "MessageSecurityAssist3",
+ "Assume that the message-security-assist extension facility 3 is installed"
+>;
+
def FeatureMessageSecurityAssist4 : SystemZFeature<
"message-security-assist-extension4", "MessageSecurityAssist4",
"Assume that the message-security-assist extension facility 4 is installed"
>;
+def FeatureResetReferenceBitsMultiple : SystemZFeature<
+ "reset-reference-bits-multiple", "ResetReferenceBitsMultiple",
+ "Assume that the reset-reference-bits-multiple facility is installed"
+>;
+
def Arch9NewFeatures : SystemZFeatureList<[
FeatureDistinctOps,
FeatureFastSerialization,
@@ -81,7 +91,9 @@ def Arch9NewFeatures : SystemZFeatureList<[
FeatureInterlockedAccess1,
FeatureLoadStoreOnCond,
FeaturePopulationCount,
- FeatureMessageSecurityAssist4
+ FeatureMessageSecurityAssist3,
+ FeatureMessageSecurityAssist4,
+ FeatureResetReferenceBitsMultiple
]>;
//===----------------------------------------------------------------------===//
@@ -120,13 +132,19 @@ def FeatureDFPZonedConversion : SystemZFeature<
"Assume that the DFP zoned-conversion facility is installed"
>;
+def FeatureEnhancedDAT2 : SystemZFeature<
+ "enhanced-dat-2", "EnhancedDAT2",
+ "Assume that the enhanced-DAT facility 2 is installed"
+>;
+
def Arch10NewFeatures : SystemZFeatureList<[
FeatureExecutionHint,
FeatureLoadAndTrap,
FeatureMiscellaneousExtensions,
FeatureProcessorAssist,
FeatureTransactionalExecution,
- FeatureDFPZonedConversion
+ FeatureDFPZonedConversion,
+ FeatureEnhancedDAT2
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 5f6115ed86a4..7620e06ccbc9 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2468,6 +2468,14 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let OpType = "reg";
}
+class UnaryTiedRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src),
+ mnemonic#"\t$R1", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R2 = 0;
+}
+
class UnaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src),
@@ -2702,6 +2710,26 @@ class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
let AddedComplexity = 7;
}
+class SideEffectBinaryRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []>;
+
+class SideEffectBinaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let R3 = 0;
+ let M4 = 0;
+}
+
+class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let M3 = 0;
+}
+
class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
Immediate imm1, Immediate imm2>
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
@@ -2729,6 +2757,10 @@ class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
: InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
mnemonic##"\t$BD1, $BDL2", []>;
+class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
+ : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ mnemonic#"\t$BD1, $BD2", []>;
+
class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src),
@@ -3612,6 +3644,22 @@ class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
shift12only:$BD2, imm32zx4:$I3),
mnemonic##"\t$BDL1, $BD2, $I3", []>;
+class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3", []> {
+ let M4 = 0;
+}
+
+class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R3, $R2", []> {
+ let M4 = 0;
+}
+
class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1,
RegisterOperand cls2,
@@ -3630,6 +3678,13 @@ class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []>;
+multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2> {
+ def "" : SideEffectTernaryRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>;
+ def Opt : SideEffectBinaryRRFc<mnemonic, opcode, cls1, cls2>;
+}
+
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
Immediate imm>
@@ -3720,6 +3775,18 @@ multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
}
}
+class SideEffectTernaryRS<string mnemonic, bits<8> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
+class SideEffectTernaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSYa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSa<opcode, (outs cls1:$R1, cls2:$R3),
@@ -3997,6 +4064,35 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
VR128:$V4, imm32zx4:$M5, 0)>;
}
+class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFaOptOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def OptOpt : SideEffectBinaryRRFa<mnemonic, opcode, cls1, cls2>;
+}
+
+class SideEffectQuaternaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R3, $R2, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstSSe<opcode, (outs),
@@ -4012,6 +4108,16 @@ class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let mayStore = 1;
}
+class CmpSwapRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr12only>
: InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 9f5e6288348e..98f66c29ae64 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -883,6 +883,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
}
def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+ // Addition to a high register.
+ def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of signed 16-bit immediates.
defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
@@ -917,6 +923,12 @@ let Defs = [CC] in {
}
def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+ // Addition to a high register.
+ def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of signed 16-bit immediates.
def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>,
Requires<[FeatureDistinctOps]>;
@@ -927,6 +939,10 @@ let Defs = [CC] in {
def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>;
def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+ // Addition of signed 32-bit immediates.
+ def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of memory.
defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
@@ -949,6 +965,10 @@ let Defs = [CC], Uses = [CC] in {
def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>;
}
+// Addition that does not modify the condition code.
+def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
//===----------------------------------------------------------------------===//
// Subtraction
//===----------------------------------------------------------------------===//
@@ -961,6 +981,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>;
+ // Subtraction from a high register.
+ def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
@@ -976,6 +1002,12 @@ let Defs = [CC] in {
def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>;
+ // Subtraction from a high register.
+ def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Subtraction of unsigned 32-bit immediates. These don't match
// subc because we prefer addc for constants.
def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>;
@@ -1298,6 +1330,12 @@ let Defs = [CC], CCValues = 0xE in {
def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>;
+ // Comparison with a high register.
+ def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH,
// depending on the choice of register.
def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>,
@@ -1344,6 +1382,12 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>;
+ // Comparison with a high register.
+ def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
// or CLIH, depending on the choice of register.
def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
@@ -1888,54 +1932,12 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
-// Supervisor call.
-let hasSideEffects = 1, isCall = 1, Defs = [CC] in
- def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
-
-// Monitor call.
-let hasSideEffects = 1, isCall = 1 in
- def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>;
-
-// Store clock.
-let hasSideEffects = 1, Defs = [CC] in {
- def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>;
- def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
- def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
-}
-
-// Store facility list.
-let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
- def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
-
-// Extract CPU attribute.
-let hasSideEffects = 1 in
- def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>;
-
-// Extract CPU time.
-let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in
- def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
-
-// Extract PSW.
-let hasSideEffects = 1, Uses = [CC] in
- def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
-
// Execute.
let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
}
-// Program return.
-let hasSideEffects = 1, Defs = [CC] in
- def PR : SideEffectInherentE<"pr", 0x0101>;
-
-// Move with key.
-let mayLoad = 1, mayStore = 1, Defs = [CC] in
- def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
-
-// Store real address.
-def STRAG : StoreSSE<"strag", 0xE502>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
new file mode 100644
index 000000000000..a9803c2d83e9
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -0,0 +1,517 @@
+//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ system-level instructions.
+// Most of these instructions are privileged or semi-privileged. They are
+// not used for code generation, but are provided for use with the assembler
+// and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Program-Status Word Instructions.
+//===----------------------------------------------------------------------===//
+
+// Extract PSW.
+let hasSideEffects = 1, Uses = [CC] in
+ def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
+
+// Load PSW (extended).
+let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in {
+ def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>;
+ def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>;
+}
+
+// Insert PSW key.
+let Uses = [R2L], Defs = [R2L] in
+ def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>;
+
+// Set PSW key from address.
+let hasSideEffects = 1 in
+ def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>;
+
+// Set system mask.
+let hasSideEffects = 1, mayLoad = 1 in
+ def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>;
+
+// Store then AND/OR system mask.
+let hasSideEffects = 1 in {
+ def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>;
+ def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>;
+}
+
+// Insert address space control.
+let hasSideEffects = 1 in
+ def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>;
+
+// Set address space control (fast).
+let hasSideEffects = 1 in {
+ def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>;
+ def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load control.
+def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>;
+def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>;
+
+// Store control.
+def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>;
+def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>;
+
+// Extract primary ASN (and instance).
+let hasSideEffects = 1 in {
+ def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>;
+ def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>;
+}
+
+// Extract secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>;
+ def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>;
+}
+
+// Set secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>;
+ def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>;
+}
+
+// Extract and set extended authority.
+let hasSideEffects = 1 in
+ def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Prefix-Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Set prefix.
+let hasSideEffects = 1 in
+ def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>;
+
+// Store prefix.
+let hasSideEffects = 1 in
+ def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>;
+
+//===----------------------------------------------------------------------===//
+// Storage-Key and Real Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+// Insert storage key extended.
+let hasSideEffects = 1 in
+ def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>;
+
+// Insert virtual storage key.
+let hasSideEffects = 1 in
+ def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>;
+
+// Set storage key extended.
+let hasSideEffects = 1, Defs = [CC] in
+ defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>;
+
+// Reset reference bit extended.
+let hasSideEffects = 1, Defs = [CC] in
+ def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>;
+
+// Reset reference bits multiple.
+let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in
+ def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>;
+
+// Perform frame management function.
+let hasSideEffects = 1 in
+ def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>;
+
+// Test block.
+let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>;
+
+// Page in / out.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>;
+ def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic-Address-Translation Instructions.
+//===----------------------------------------------------------------------===//
+
+// Invalidate page table entry.
+let hasSideEffects = 1 in
+ defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>;
+
+// Invalidate DAT table entry.
+let hasSideEffects = 1 in
+ defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>;
+
+// Compare and replace DAT table entry.
+let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in
+ defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>;
+
+// Purge TLB.
+let hasSideEffects = 1 in
+ def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>;
+
+// Compare and swap and purge.
+let hasSideEffects = 1, Defs = [CC] in {
+ def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>;
+ def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>;
+}
+
+// Load page-table-entry address.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>;
+
+// Load real address.
+let hasSideEffects = 1, Defs = [CC] in {
+ defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>;
+ def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>;
+}
+
+// Store real address.
+def STRAG : StoreSSE<"strag", 0xE502>;
+
+// Load using real address.
+let mayLoad = 1 in {
+ def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>;
+ def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>;
+}
+
+// Store using real address.
+let mayStore = 1 in {
+ def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>;
+ def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>;
+}
+
+// Test protection.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPROT : SideEffectBinarySSE<"tprot", 0xE501>;
+
+//===----------------------------------------------------------------------===//
+// Memory-move Instructions.
+//===----------------------------------------------------------------------===//
+
+// Move with key.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
+ def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
+
+// Move to primary / secondary.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>;
+ def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>;
+}
+
+// Move with source / destination key.
+let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in {
+ def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>;
+ def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>;
+}
+
+// Move with optional specifications.
+let mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>;
+
+// Move page.
+let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in
+ def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Address-Space Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load address space parameters.
+let hasSideEffects = 1, Defs = [CC] in
+ def LASP : SideEffectBinarySSE<"lasp", 0xE500>;
+
+// Purge ALB.
+let hasSideEffects = 1 in
+ def PALB : SideEffectInherentRRE<"palb", 0xB248>;
+
+// Program call.
+let hasSideEffects = 1 in
+ def PC : SideEffectAddressS<"pc", 0xB218, null_frag>;
+
+// Program return.
+let hasSideEffects = 1, Defs = [CC] in
+ def PR : SideEffectInherentE<"pr", 0x0101>;
+
+// Program transfer (with instance).
+let hasSideEffects = 1 in {
+ def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>;
+ def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>;
+}
+
+// Resume program.
+let hasSideEffects = 1, Defs = [CC] in
+ def RP : SideEffectAddressS<"rp", 0xB277, null_frag>;
+
+// Branch in subspace group.
+let hasSideEffects = 1 in
+ def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>;
+
+// Branch and set authority.
+let hasSideEffects = 1 in
+ def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>;
+
+// Test access.
+let Defs = [CC] in
+ def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Linkage-Stack Instructions.
+//===----------------------------------------------------------------------===//
+
+// Branch and stack.
+let hasSideEffects = 1 in
+ def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>;
+
+// Extract stacked registers.
+let hasSideEffects = 1 in {
+ def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>;
+ def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>;
+}
+
+// Extract stacked state.
+let hasSideEffects = 1, Defs = [CC] in
+ def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>;
+
+// Modify stacked state.
+let hasSideEffects = 1 in
+ def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>;
+
+//===----------------------------------------------------------------------===//
+// Time-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Perform timing facility function.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in
+ def PTFF : SideEffectInherentE<"ptff", 0x0104>;
+
+// Set clock.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>;
+
+// Set clock programmable field.
+let hasSideEffects = 1, Uses = [R0L] in
+ def SCKPF : SideEffectInherentE<"sckpf", 0x0107>;
+
+// Set clock comparator.
+let hasSideEffects = 1 in
+ def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>;
+
+// Set CPU timer.
+let hasSideEffects = 1 in
+ def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>;
+
+// Store clock (fast / extended).
+let hasSideEffects = 1, Defs = [CC] in {
+ def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>;
+ def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
+ def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
+}
+
+// Store clock comparator.
+let hasSideEffects = 1 in
+ def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>;
+
+// Store CPU timer.
+let hasSideEffects = 1 in
+ def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Store CPU address.
+let hasSideEffects = 1 in
+ def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>;
+
+// Store CPU ID.
+let hasSideEffects = 1 in
+ def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>;
+
+// Store system information.
+let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in
+ def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>;
+
+// Store facility list.
+let hasSideEffects = 1 in
+ def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>;
+
+// Store facility list extended.
+let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
+
+// Extract CPU attribute.
+let hasSideEffects = 1 in
+ def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>;
+
+// Extract CPU time.
+let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in
+ def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
+
+// Perform topology function.
+let hasSideEffects = 1 in
+ def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>;
+
+// Perform cryptographic key management operation.
+let Predicates = [FeatureMessageSecurityAssist3],
+ hasSideEffects = 1, Uses = [R0L, R1D] in
+ def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Supervisor call.
+let hasSideEffects = 1, isCall = 1, Defs = [CC] in
+ def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
+
+// Monitor call.
+let hasSideEffects = 1, isCall = 1 in
+ def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>;
+
+// Diagnose.
+let hasSideEffects = 1, isCall = 1 in
+ def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>;
+
+// Trace.
+let hasSideEffects = 1, mayLoad = 1 in {
+ def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>;
+ def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>;
+}
+
+// Trap.
+let hasSideEffects = 1 in {
+ def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>;
+ def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>;
+}
+
+// Signal processor.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>;
+
+// Signal adapter.
+let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in
+ def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>;
+
+// Start interpretive execution.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Measurement Facility Instructions (SA23-2260).
+//===----------------------------------------------------------------------===//
+
+// Load program parameter
+let hasSideEffects = 1 in
+ def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>;
+
+// Extract coprocessor-group address.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>;
+
+// Extract CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>;
+
+// Extract peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>;
+
+// Load CPU-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>;
+
+// Load peripheral-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>;
+
+// Load sampling controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>;
+
+// Query sampling information.
+let hasSideEffects = 1 in
+ def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>;
+
+// Query counter information.
+let hasSideEffects = 1 in
+ def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>;
+
+// Set CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>;
+
+// Set peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// I/O Instructions (Principles of Operation, Chapter 14).
+//===----------------------------------------------------------------------===//
+
+// Clear subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>;
+
+// Halt subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>;
+
+// Modify subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>;
+
+// Resume subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>;
+
+// Start subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>;
+
+// Store subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>;
+
+// Test subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>;
+
+// Cancel subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>;
+
+// Reset channel path.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>;
+
+// Set channel monitor.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in
+ def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>;
+
+// Store channel path status.
+let hasSideEffects = 1 in
+ def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>;
+
+// Store channel report word.
+let hasSideEffects = 1, Defs = [CC] in
+ def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>;
+
+// Test pending interruption.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>;
+
+// Set address limit.
+let hasSideEffects = 1, Uses = [R1L] in
+ def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>;
+
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 47d2f75cc11a..36809ea81dc1 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -304,3 +304,13 @@ foreach I = 0-15 in {
defm AR32 : SystemZRegClass<"AR32", [i32], 32,
(add (sequence "A%u", 0, 15)), 0>;
+// Control registers.
+class CREG64<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+foreach I = 0-15 in {
+ def C#I : CREG64<I, "c"#I>, DwarfRegNum<[!add(I, 32)]>;
+}
+defm CR64 : SystemZRegClass<"CR64", [i64], 64,
+ (add (sequence "C%u", 0, 15)), 0>;
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 5f5f2f690e58..adc9f2976f87 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -353,6 +353,9 @@ def : InstRW<[FXa], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXa], (instregex "ALGR(K)?$")>;
def : InstRW<[FXa], (instregex "ALR(K)?$")>;
def : InstRW<[FXa], (instregex "AR(K)?$")>;
+def : InstRW<[FXa], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXa], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>;
// Logical addition with carry
@@ -376,6 +379,8 @@ def : InstRW<[FXa], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXa], (instregex "SLGR(K)?$")>;
def : InstRW<[FXa], (instregex "SLR(K)?$")>;
def : InstRW<[FXa], (instregex "SR(K)?$")>;
+def : InstRW<[FXa], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>;
@@ -506,6 +511,8 @@ def : InstRW<[FXb], (instregex "CLIH$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXb], (instregex "CLR$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXb], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
@@ -701,38 +708,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXb], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXb, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXb, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
- (instregex "STCK(F)?$")>;
-def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
- (instregex "STCKE$")>;
-def : InstRW<[FXa, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXb, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1364,5 +1342,162 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>;
def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>;
def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXa, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXb], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXa, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXb, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXb, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXb, Lat30], (instregex "TB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXb, Lat30], (instregex "PR$")>;
+def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXb, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
+ (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXb], (instregex "MC$")>;
+def : InstRW<[FXb, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXb], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXb, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LPP$")>;
+def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXb, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXb, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 126eac2e2072..128049a09086 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -310,6 +310,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
def : InstRW<[FXU], (instregex "ALR(K)?$")>;
def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
// Logical addition with carry
@@ -333,6 +336,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
def : InstRW<[FXU], (instregex "SLR(K)?$")>;
def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
@@ -468,6 +473,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXU], (instregex "CLR$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>;
@@ -634,37 +641,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXU], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXU, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXU, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
-def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
-def : InstRW<[LSU, FXU, Lat5], (instregex "STCKE$")>;
-def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1058,5 +1037,160 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index d38ca64d2e9b..76b378454631 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -320,6 +320,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
def : InstRW<[FXU], (instregex "ALR(K)?$")>;
def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
// Logical addition with carry
@@ -343,6 +346,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
def : InstRW<[FXU], (instregex "SLR(K)?$")>;
def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
@@ -478,6 +483,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXU], (instregex "CLR$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
@@ -672,37 +679,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXU], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXU, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXU, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
- (instregex "STCKE$")>;
-def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1102,5 +1081,161 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 0ab0c2f25915..eb4a0962f7eb 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -37,12 +37,13 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasPopulationCount(false), HasMessageSecurityAssist4(false),
+ HasPopulationCount(false), HasMessageSecurityAssist3(false),
+ HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false),
HasFastSerialization(false), HasInterlockedAccess1(false),
HasMiscellaneousExtensions(false),
HasExecutionHint(false), HasLoadAndTrap(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
- HasDFPZonedConversion(false),
+ HasDFPZonedConversion(false), HasEnhancedDAT2(false),
HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
HasDFPPackedConversion(false),
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index be480f03c572..b05a1bb6cafd 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -39,7 +39,9 @@ protected:
bool HasHighWord;
bool HasFPExtension;
bool HasPopulationCount;
+ bool HasMessageSecurityAssist3;
bool HasMessageSecurityAssist4;
+ bool HasResetReferenceBitsMultiple;
bool HasFastSerialization;
bool HasInterlockedAccess1;
bool HasMiscellaneousExtensions;
@@ -48,6 +50,7 @@ protected:
bool HasTransactionalExecution;
bool HasProcessorAssist;
bool HasDFPZonedConversion;
+ bool HasEnhancedDAT2;
bool HasVector;
bool HasLoadStoreOnCond2;
bool HasLoadAndZeroRightmostByte;
@@ -109,9 +112,18 @@ public:
bool hasPopulationCount() const { return HasPopulationCount; }
// Return true if the target has the message-security-assist
+ // extension facility 3.
+ bool hasMessageSecurityAssist3() const { return HasMessageSecurityAssist3; }
+
+ // Return true if the target has the message-security-assist
// extension facility 4.
bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; }
+ // Return true if the target has the reset-reference-bits-multiple facility.
+ bool hasResetReferenceBitsMultiple() const {
+ return HasResetReferenceBitsMultiple;
+ }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
@@ -138,6 +150,9 @@ public:
// Return true if the target has the DFP zoned-conversion facility.
bool hasDFPZonedConversion() const { return HasDFPZonedConversion; }
+ // Return true if the target has the enhanced-DAT facility 2.
+ bool hasEnhancedDAT2() const { return HasEnhancedDAT2; }
+
// Return true if the target has the load-and-zero-rightmost-byte facility.
bool hasLoadAndZeroRightmostByte() const {
return HasLoadAndZeroRightmostByte;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 422c16b8eb62..ce5c57e0f519 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -238,7 +238,7 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
+void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index bdba7601eb78..6923fc6fc910 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -45,7 +45,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
/// @}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 39cb1ca336f2..129794171464 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -57,17 +57,19 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
-// Placemarkers to indicate the start or end of a block or loop scope. These
-// use/clobber VALUE_STACK to prevent them from being moved into the middle of
-// an expression tree.
+// Placemarkers to indicate the start or end of a block, loop, or try scope.
+// These use/clobber VALUE_STACK to prevent them from being moved into the
+// middle of an expression tree.
let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+def TRY : I<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>;
-// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode
-// in wasm.
+// END_BLOCK, END_LOOP, END_TRY, and END_FUNCTION are represented with the same
+// opcode in wasm.
def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>;
+def END_TRY : I<(outs), (ins), [], "end_try", 0x0b>;
let isTerminator = 1, isBarrier = 1 in
def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -112,6 +114,20 @@ let isReturn = 1 in {
def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>;
+def THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$obj),
+ [(int_wasm_throw imm:$tag, I32:$obj)], "throw \t$tag, $obj",
+ 0x08>;
+def THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$obj),
+ [(int_wasm_throw imm:$tag, I64:$obj)], "throw \t$tag, $obj",
+ 0x08>;
+def RETHROW : I<(outs), (ins i32imm:$rel_depth), [], "rethrow \t$rel_depth",
+ 0x09>;
+
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
} // Defs = [ARGUMENTS]
+
+// rethrow takes a relative depth as an argument, for which currently only 0 is
+// possible for C++. Once other languages need depths other than 0, depths will
+// be computed in CFGStackify.
+def : Pat<(int_wasm_rethrow), (RETHROW 0)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 947c0329bb6e..f0b6a3e35dba 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -897,7 +897,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
}
}
- // Look for orphan landingpads, can occur in blocks with no predecesors
+ // Look for orphan landingpads, can occur in blocks with no predecessors
for (BasicBlock &BB : F) {
Instruction *I = BB.getFirstNonPHI();
if (auto *LPI = dyn_cast<LandingPadInst>(I))
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d30cc724c203..825f23dc52d9 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -49,8 +49,11 @@ static const char OpPrecedence[] = {
4, // IC_MINUS
5, // IC_MULTIPLY
5, // IC_DIVIDE
- 6, // IC_RPAREN
- 7, // IC_LPAREN
+ 5, // IC_MOD
+ 6, // IC_NOT
+ 7, // IC_NEG
+ 8, // IC_RPAREN
+ 9, // IC_LPAREN
0, // IC_IMM
0 // IC_REGISTER
};
@@ -92,6 +95,9 @@ private:
IC_MINUS,
IC_MULTIPLY,
IC_DIVIDE,
+ IC_MOD,
+ IC_NOT,
+ IC_NEG,
IC_RPAREN,
IC_LPAREN,
IC_IMM,
@@ -111,6 +117,10 @@ private:
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
+ bool isUnaryOperator(const InfixCalculatorTok Op) {
+ return Op == IC_NEG || Op == IC_NOT;
+ }
+
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
@@ -192,6 +202,22 @@ private:
ICToken Op = PostfixStack[i];
if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
OperandStack.push_back(Op);
+ } else if (isUnaryOperator(Op.first)) {
+ assert (OperandStack.size() > 0 && "Too few operands.");
+ ICToken Operand = OperandStack.pop_back_val();
+ assert (Operand.first == IC_IMM &&
+ "Unary operation with a register!");
+ switch (Op.first) {
+ default:
+ report_fatal_error("Unexpected operator!");
+ break;
+ case IC_NEG:
+ OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
+ break;
+ case IC_NOT:
+ OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
+ break;
+ }
} else {
assert (OperandStack.size() > 1 && "Too few operands.");
int64_t Val;
@@ -222,6 +248,12 @@ private:
Val = Op1.second / Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
+ case IC_MOD:
+ assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Modulo operation with an immediate and a register!");
+ Val = Op1.second % Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
case IC_OR:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Or operation with an immediate and a register!");
@@ -271,6 +303,7 @@ private:
IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
+ IES_MOD,
IES_LBRAC,
IES_RBRAC,
IES_LPAREN,
@@ -421,10 +454,16 @@ private:
default:
State = IES_ERROR;
break;
+ case IES_OR:
+ case IES_XOR:
+ case IES_AND:
+ case IES_LSHIFT:
+ case IES_RSHIFT:
case IES_PLUS:
case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
+ case IES_MOD:
case IES_LPAREN:
case IES_RPAREN:
case IES_LBRAC:
@@ -432,11 +471,12 @@ private:
case IES_INTEGER:
case IES_REGISTER:
State = IES_MINUS;
- // Only push the minus operator if it is not a unary operator.
- if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
- CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
- CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+ // push minus operator if it is not a negate operator
+ if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
+ CurrState == IES_INTEGER || CurrState == IES_RBRAC)
IC.pushOperator(IC_MINUS);
+ else
+ IC.pushOperator(IC_NEG);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// a scale of 1.
@@ -458,9 +498,21 @@ private:
default:
State = IES_ERROR;
break;
+ case IES_OR:
+ case IES_XOR:
+ case IES_AND:
+ case IES_LSHIFT:
+ case IES_RSHIFT:
case IES_PLUS:
+ case IES_MINUS:
case IES_NOT:
+ case IES_MULTIPLY:
+ case IES_DIVIDE:
+ case IES_MOD:
+ case IES_LPAREN:
+ case IES_LBRAC:
State = IES_NOT;
+ IC.pushOperator(IC_NOT);
break;
}
PrevState = CurrState;
@@ -525,6 +577,7 @@ private:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_DIVIDE:
+ case IES_MOD:
case IES_MULTIPLY:
case IES_LPAREN:
State = IES_INTEGER;
@@ -539,26 +592,6 @@ private:
}
// Get the scale and replace the 'Register * Scale' with '0'.
IC.popOperator();
- } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- CurrState == IES_MINUS) {
- // Unary minus. No need to pop the minus operand because it was never
- // pushed.
- IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
- } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- CurrState == IES_NOT) {
- // Unary not. No need to pop the not operand because it was never
- // pushed.
- IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
@@ -594,6 +627,19 @@ private:
break;
}
}
+ void onMod() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ State = IES_MOD;
+ IC.pushOperator(IC_MOD);
+ break;
+ }
+ }
void onLBrac() {
PrevState = State;
switch (State) {
@@ -647,18 +693,8 @@ private:
case IES_RSHIFT:
case IES_MULTIPLY:
case IES_DIVIDE:
+ case IES_MOD:
case IES_LPAREN:
- // FIXME: We don't handle this type of unary minus or not, yet.
- if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- (CurrState == IES_MINUS || CurrState == IES_NOT)) {
- State = IES_ERROR;
- break;
- }
State = IES_LPAREN;
IC.pushOperator(IC_LPAREN);
break;
@@ -1302,6 +1338,8 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine
SM.onXor();
else if (Name.equals_lower("and"))
SM.onAnd();
+ else if (Name.equals_lower("mod"))
+ SM.onMod();
else
return false;
return true;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index caf98bffb80d..8f2017e990c5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -396,7 +396,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (!SB->getFragment()) {
Asm.getContext().reportError(
Fixup.getLoc(),
- "symbol '" + B->getSymbol().getName() +
+ "symbol '" + SB->getName() +
"' can not be undefined in a subtraction expression");
return false;
}
@@ -408,7 +408,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
// pedantic compatibility with 'as'.
Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
: (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
- Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ Value2 = Writer->getSymbolAddress(*SB, Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
@@ -468,8 +468,8 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
const MCFixup &Fixup,
MCValue Target,
uint64_t &FixedValue) {
- assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
- !is64Bit() &&
+ const MCSymbolRefExpr *SymA = Target.getSymA();
+ assert(SymA->getKind() == MCSymbolRefExpr::VK_TLVP && !is64Bit() &&
"Should only be called with a 32-bit TLVP relocation!");
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -480,15 +480,14 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
// subtraction from the picbase. For 32-bit pic the addend is the difference
// between the picbase and the next address. For 32-bit static the addend is
// zero.
- if (Target.getSymB()) {
+ if (auto *SymB = Target.getSymB()) {
// If this is a subtraction then we're pcrel.
uint32_t FixupAddress =
Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
IsPCRel = 1;
- FixedValue =
- FixupAddress -
- Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) +
- Target.getConstant();
+ FixedValue = FixupAddress -
+ Writer->getSymbolAddress(SymB->getSymbol(), Layout) +
+ Target.getConstant();
FixedValue += 1ULL << Log2Size;
} else {
FixedValue = 0;
@@ -499,8 +498,7 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
MRE.r_word0 = Value;
MRE.r_word1 =
(IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
- Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(),
- MRE);
+ Writer->addRelocation(&SymA->getSymbol(), Fragment->getParent(), MRE);
}
void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 5892f1de33ee..807f7a6ddb19 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -44,7 +44,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
const MCAsmBackend &MAB) const {
unsigned FixupKind = Fixup.getKind();
if (IsCrossSection) {
- if (FixupKind != FK_Data_4) {
+ if (FixupKind != FK_Data_4 && FixupKind != llvm::X86::reloc_signed_4byte) {
Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression");
return COFF::IMAGE_REL_AMD64_ADDR32;
}
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index fe105298f5c1..7437ebacfac3 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -300,6 +300,8 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
"Intel Silvermont processors">;
+def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
+ "Intel Goldmont processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -430,6 +432,34 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
+class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
+ ProcIntelGLM,
+ FeatureX87,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeatureCallRegIndirect,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF,
+ FeatureMPX,
+ FeatureSHA,
+ FeatureRDSEED,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT
+]>;
+def : GoldmontProc<"goldmont">;
+
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f777e5628988..b89914f8893e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5065,6 +5065,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
+// Return true if the instruction zeroes the unused upper part of the
+// destination and accepts mask.
+static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return true;
+ }
+}
+
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -5097,6 +5111,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// 3. Subvector should be inserted in the middle (for example v2i1
// to v16i1, index 2)
+ // If this node widens - by concatenating zeroes - the type of the result
+ // of a node with instruction that zeroes all upper (irrelevant) bits of the
+ // output register, mark this node as legal to enable replacing them with
+ // the v8i1 version of the previous instruction during instruction selection.
+ // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
+ // while zeroing all the upper remaining 60 bits of the register. if the
+ // result of such instruction is inserted into an allZeroVector, then we can
+ // safely remove insert_vector (in instruction selection) as the cmp instr
+ // already zeroed the rest of the register.
+ if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
+ (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
+ (SubVec.getOpcode() == ISD::AND &&
+ (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
+ isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
+ return Op;
+
// extend to natively supported kshift
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
MVT WideOpVT = OpVT;
@@ -7919,6 +7949,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
+// Return true if all the operands of the given CONCAT_VECTORS node are zeros
+// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
+static bool isExpandWithZeros(const SDValue &Op) {
+ assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
+ "Expand with zeros only possible in CONCAT_VECTORS nodes!");
+
+ for (unsigned i = 1; i < Op.getNumOperands(); i++)
+ if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
+ return false;
+
+ return true;
+}
+
+// Returns true if the given node is a type promotion (by concatenating i1
+// zeros) of the result of a node that already zeros all upper bits of
+// k-register.
+static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
+ unsigned Opc = Op.getOpcode();
+
+ assert(Opc == ISD::CONCAT_VECTORS &&
+ Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ "Unexpected node to check for type promotion!");
+
+ // As long as we are concatenating zeros to the upper part of a previous node
+ // result, climb up the tree until a node with different opcode is
+ // encountered
+ while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
+ if (Opc == ISD::INSERT_SUBVECTOR) {
+ if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
+ Op.getConstantOperandVal(2) == 0)
+ Op = Op.getOperand(1);
+ else
+ return SDValue();
+ } else { // Opc == ISD::CONCAT_VECTORS
+ if (isExpandWithZeros(Op))
+ Op = Op.getOperand(0);
+ else
+ return SDValue();
+ }
+ Opc = Op.getOpcode();
+ }
+
+ // Check if the first inserted node zeroes the upper bits, or an 'and' result
+ // of a node that zeros the upper bits (its masked version).
+ if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
+ (Op.getOpcode() == ISD::AND &&
+ (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
+ isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
+ return Op;
+ }
+
+ return SDValue();
+}
+
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
@@ -7929,6 +8013,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
+ // If this node promotes - by concatenating zeroes - the type of the result
+ // of a node with instruction that zeroes all upper (irrelevant) bits of the
+ // output register, mark it as legal and catch the pattern in instruction
+ // selection to avoid emitting extra insturctions (for zeroing upper bits).
+ if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
+ SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
+ SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
+ ZeroC);
+ }
+
SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
// Specialize the cases when all, or all but one, of the operands are undef.
@@ -27012,6 +27107,9 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
+ unsigned InputSizeInBits = MaskVT.getSizeInBits();
+ unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
+ MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
bool ContainsZeros = false;
APInt Zeroable(NumMaskElts, false);
@@ -27027,7 +27125,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
- MaskVT.getScalarSizeInBits(), Mask,
+ MaskScalarSizeInBits, Mask,
0, Zeroable, Subtarget);
if (0 < ShiftAmt) {
PermuteImm = (unsigned)ShiftAmt;
@@ -27043,10 +27141,6 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
return SM_SentinelUndef <= M && M < (int)NumMaskElts;
}) && "Expected unary shuffle");
- unsigned InputSizeInBits = MaskVT.getSizeInBits();
- unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size();
- MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
-
// Handle PSHUFLW/PSHUFHW repeated patterns.
if (MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
@@ -35072,7 +35166,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
/// that is commonly recognized as an idiom (has no register dependency), so
/// that's better/smaller than loading a splat 1 constant.
static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB &&
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Unexpected opcode for increment/decrement transform");
// Pseudo-legality check: getOnesVector() expects one of these types, so bail
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 01a70323224c..cc5c09cbf0e5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
+class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
+ ValueType _vt> {
+ RegisterClass KRC = _krc;
+ RegisterClass KRCWM = _krcwm;
+ ValueType KVT = _vt;
+}
+
+def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
+def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
+def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
+def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
+def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
+def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPGTDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPEQDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-}
+multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPEQB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPEQW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ", [HasBWI]>;
+
+// VPCMPEQD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+
+// VPCMPEQQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+
+// VPCMPGTB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPGTW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ", [HasBWI]>;
+
+// VPCMPGTD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+
+// VPCMPGTQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
@@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
+ _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
+ "VPCMPWZ", [HasBWI]>;
+
+// VPCMPD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+
+// VPCMPQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+
+// VPCMPUB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPUW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ", [HasBWI]>;
+
+// VPCMPUD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+
+// VPCMPUQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VCMPPSZrri
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPUDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
+multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds>
+ : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
+
+let Predicates = Preds in
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+
+
+// VCMPPS - f32
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+
+// VCMPPD - f64
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
+ [HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
@@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
+def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+}
+
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+ AVX512VLVectorVTInfo _> {
+def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
+
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
+}
+
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index f98c2a7e802d..e34a90e975b8 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -75,6 +75,8 @@ private:
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -270,6 +272,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectUadde(I, MRI, MF))
return true;
+ if (selectMergeValues(I, MRI, MF))
+ return true;
if (selectExtract(I, MRI, MF))
return true;
if (selectInsert(I, MRI, MF))
@@ -914,6 +918,55 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectMergeValues(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES)
+ return false;
+
+ // Split to inserts.
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg0 = I.getOperand(1).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg0);
+ unsigned SrcSize = SrcTy.getSizeInBits();
+
+ const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+ // For the first src use insertSubReg.
+ unsigned DefReg = MRI.createGenericVirtualRegister(DstTy);
+ MRI.setRegBank(DefReg, RegBank);
+ if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
+ return false;
+
+ for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
+
+ unsigned Tmp = MRI.createGenericVirtualRegister(DstTy);
+ MRI.setRegBank(Tmp, RegBank);
+
+ MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::G_INSERT), Tmp)
+ .addReg(DefReg)
+ .addReg(I.getOperand(Idx).getReg())
+ .addImm((Idx - 1) * SrcSize);
+
+ DefReg = Tmp;
+
+ if (!select(InsertInst))
+ return false;
+ }
+
+ MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(DefReg);
+
+ if (!select(CopyInst))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
InstructionSelector *
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &Subtarget,
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index a584eabcc1b2..a5fa3340c3f1 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -56,7 +56,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
for (auto Ty : {s8, s16, s32})
setAction({BinOp, Ty}, Legal);
@@ -117,7 +117,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
for (auto Ty : {s8, s16, s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -228,10 +228,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
for (auto Ty : {v8s32, v4s64})
setAction({MemOp, Ty}, Legal);
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) {
setAction({G_INSERT, Ty}, Legal);
- for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) {
setAction({G_INSERT, 1, Ty}, Legal);
+ setAction({G_EXTRACT, Ty}, Legal);
+ }
}
void X86LegalizerInfo::setLegalizerInfoAVX2() {
@@ -280,10 +284,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
for (auto Ty : {v16s32, v8s64})
setAction({MemOp, Ty}, Legal);
- for (auto Ty : {v64s8, v32s16, v16s32, v8s64})
+ for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) {
setAction({G_INSERT, Ty}, Legal);
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) {
setAction({G_INSERT, 1, Ty}, Legal);
+ setAction({G_EXTRACT, Ty}, Legal);
+ }
/************ VLX *******************/
if (!Subtarget.hasVLX())
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e36a47506ba0..24845beac22d 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,10 +11,23 @@
//
//===----------------------------------------------------------------------===//
+#include "X86.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#include "X86RegisterBankInfo.h"
+#endif
#include "X86Subtarget.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
#include "llvm/IR/Attributes.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
@@ -336,6 +349,35 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct X86GISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
unsigned StackAlignOverride)
@@ -360,6 +402,19 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
setPICStyle(PICStyles::StubPIC);
else if (isTargetELF())
setPICStyle(PICStyles::GOT);
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
+
+ GISel->CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new X86LegalizerInfo(*this, TM));
+
+ auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
+ GISel->RegBankInfo.reset(RBI);
+ GISel->InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
+#endif
+ setGISelAccessor(*GISel);
}
const CallLowering *X86Subtarget::getCallLowering() const {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 550e95c39ab5..fa0afe29586b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -58,7 +58,7 @@ protected:
};
enum X86ProcFamilyEnum {
- Others, IntelAtom, IntelSLM
+ Others, IntelAtom, IntelSLM, IntelGLM
};
/// X86 processor family: Intel Atom, and others
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a9f42cacf788..8d891c983fab 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -15,9 +15,6 @@
#include "X86.h"
#include "X86CallLowering.h"
#include "X86LegalizerInfo.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "X86RegisterBankInfo.h"
-#endif
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
@@ -31,7 +28,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -212,35 +208,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
X86TargetMachine::~X86TargetMachine() = default;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct X86GISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
const X86Subtarget *
X86TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -280,20 +247,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
Options.StackAlignmentOverride);
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
-
- GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this));
-
- auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
- GISel->RegBankInfo.reset(RBI);
- GISel->InstSelector.reset(createX86InstructionSelector(
- *this, *I, *RBI));
-#endif
- I->setGISelAccessor(*GISel);
}
return I.get();
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 1bf267d34ec2..aaa6d58bd134 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -40,6 +40,7 @@ public:
~X86TargetMachine() override;
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
+ const X86Subtarget *getSubtargetImpl() const = delete;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 5c666bdfea1f..9a8cc5a2591c 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -58,10 +58,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_subfn_addr;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -70,10 +70,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_alloc;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -175,10 +175,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_id;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -187,10 +187,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_frame;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -203,10 +203,10 @@ public:
Value *getFrame() const { return getArgOperand(FrameArg); }
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_free;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -221,10 +221,10 @@ public:
Value *getMem() const { return getArgOperand(MemArg); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_begin;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -233,10 +233,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroSaveInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_save;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -254,10 +254,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_promise;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -279,10 +279,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_suspend;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -291,10 +291,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_size;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -310,10 +310,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_end;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 087a8aa2c624..5b1b58b89c32 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -56,10 +56,6 @@ RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
- cl::desc("Run the BB vectorization passes"));
-
-static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
@@ -138,8 +134,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-earlycse-memssa", cl::init(false), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)"));
+ "enable-earlycse-memssa", cl::init(true), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
@@ -166,7 +162,6 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
@@ -263,11 +258,12 @@ void PassManagerBuilder::populateFunctionPassManager(
// Do PGO instrumentation generation or use pass as the option specified.
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
- if (!EnablePGOInstrGen && PGOInstrUse.empty())
+ if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
return;
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
- if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
+ if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
+ PGOSampleUse.empty()) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -383,26 +379,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (!RunSLPAfterLoopVectorization) {
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass(OptLevel));
- }
- }
+ if (!RunSLPAfterLoopVectorization && SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -634,28 +612,10 @@ void PassManagerBuilder::populateModulePassManager(
addInstructionCombiningPass(MPM);
}
- if (RunSLPAfterLoopVectorization) {
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- MPM.add(createEarlyCSEPass());
- }
- }
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass(OptLevel));
+ if (RunSLPAfterLoopVectorization && SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
}
}
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 656421ee58df..ac4765f96075 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1484,7 +1484,8 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
- SampleProfileLoader SampleLoader(SampleProfileFile);
+ SampleProfileLoader SampleLoader(
+ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName);
SampleLoader.doInitialization(M);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 802f470ffe1f..8d494fe9cde2 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -371,6 +371,7 @@ void splitAndWriteThinLTOBitcode(
/*GenerateHash=*/true, &ModHash);
W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
+ W.writeSymtab();
W.writeStrtab();
OS << Buffer;
@@ -385,6 +386,7 @@ void splitAndWriteThinLTOBitcode(
/*GenerateHash=*/false, &ModHash);
W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
+ W2.writeSymtab();
W2.writeStrtab();
*ThinLinkOS << Buffer;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d3d8cefe9735..db98be2c98f5 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2301,10 +2301,10 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
// (~B | A) ^ (~A | B) -> A ^ B
// (~A | B) ^ (A | ~B) -> A ^ B
// (B | ~A) ^ (A | ~B) -> A ^ B
- if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if ((match(Op0, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2314,10 +2314,10 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
// (~B & A) ^ (~A & B) -> A ^ B
// (~A & B) ^ (A & ~B) -> A ^ B
// (B & ~A) ^ (A & ~B) -> A ^ B
- if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if ((match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2456,10 +2456,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ // not (cmp A, B) = !cmp A, B
ICmpInst::Predicate Pred;
- if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) &&
- match(Op1, m_AllOnes())) {
+ if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
return replaceInstUsesWith(I, Op0);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dbed7ad4eae8..3770021de100 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1985,7 +1985,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *X = nullptr;
// bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+ if (match(IIOperand, m_BitReverse(m_Value(X))))
return replaceInstUsesWith(CI, X);
break;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6ad32490a328..58b8b2f52629 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -112,10 +112,10 @@ static bool subWithOverflow(Constant *&Result, Constant *In1,
/// Given an icmp instruction, return true if any use of this comparison is a
/// branch on sign bit comparison.
-static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) {
+static bool hasBranchUse(ICmpInst &I) {
for (auto *U : I.users())
if (isa<BranchInst>(U))
- return isSignBit;
+ return true;
return false;
}
@@ -1448,12 +1448,13 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
// of a test and branch. So we avoid canonicalizing in such situations
// because test and branch instruction has better branch displacement
// than compare and branch instruction.
- if (!isBranchOnSignBitCheck(Cmp, IsSignBit) && !Cmp.isEquality()) {
- if (auto *AI = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
- if (auto *AD = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
- }
+ if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
+ return nullptr;
+
+ if (auto *AI = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
+ if (auto *AD = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
}
return nullptr;
@@ -3301,12 +3302,12 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
return nullptr;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ const CmpInst::Predicate Pred = I.getPredicate();
Value *A, *B, *C, *D;
if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
Value *OtherVal = A == Op1 ? B : A;
- return new ICmpInst(I.getPredicate(), OtherVal,
- Constant::getNullValue(A->getType()));
+ return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
}
if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
@@ -3316,26 +3317,25 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
Op1->hasOneUse()) {
Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
Value *Xor = Builder->CreateXor(C, NC);
- return new ICmpInst(I.getPredicate(), A, Xor);
+ return new ICmpInst(Pred, A, Xor);
}
// A^B == A^D -> B == D
if (A == C)
- return new ICmpInst(I.getPredicate(), B, D);
+ return new ICmpInst(Pred, B, D);
if (A == D)
- return new ICmpInst(I.getPredicate(), B, C);
+ return new ICmpInst(Pred, B, C);
if (B == C)
- return new ICmpInst(I.getPredicate(), A, D);
+ return new ICmpInst(Pred, A, D);
if (B == D)
- return new ICmpInst(I.getPredicate(), A, C);
+ return new ICmpInst(Pred, A, C);
}
}
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
Value *OtherVal = A == Op0 ? B : A;
- return new ICmpInst(I.getPredicate(), OtherVal,
- Constant::getNullValue(A->getType()));
+ return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
@@ -3380,8 +3380,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt Pow2 = Cst1->getValue() + 1;
if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
- return new ICmpInst(I.getPredicate(), A,
- Builder->CreateTrunc(B, A->getType()));
+ return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType()));
}
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
@@ -3393,12 +3392,11 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
unsigned TypeBits = Cst1->getBitWidth();
unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
if (ShAmt < TypeBits && ShAmt != 0) {
- ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
- ? ICmpInst::ICMP_UGE
- : ICmpInst::ICMP_ULT;
+ ICmpInst::Predicate NewPred =
+ Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
- return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal));
}
}
@@ -3412,8 +3410,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
I.getName() + ".mask");
- return new ICmpInst(I.getPredicate(), And,
- Constant::getNullValue(Cst1->getType()));
+ return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
}
}
@@ -3437,7 +3434,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
CmpV <<= ShAmt;
Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
- return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1b0fe84dd4dd..87f11467b95e 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -131,11 +131,10 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
return true;
// A vector of constant integers can be inverted easily.
- Constant *CV;
- if (V->getType()->isVectorTy() && match(V, PatternMatch::m_Constant(CV))) {
+ if (V->getType()->isVectorTy() && isa<Constant>(V)) {
unsigned NumElts = V->getType()->getVectorNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = CV->getAggregateElement(i);
+ Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
if (!Elt)
return false;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index ca370c73fca4..26bee204e5a4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -661,6 +661,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
if (NumElements == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ NewLoad->setAAMetadata(AAMD);
return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -690,6 +693,10 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Name + ".elt");
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ // Propagate AA metadata. It'll still be valid on the narrowed load.
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ L->setAAMetadata(AAMD);
V = IC.Builder->CreateInsertValue(V, L, i);
}
@@ -702,6 +709,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ NewLoad->setAAMetadata(AAMD);
return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -734,6 +744,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Name + ".elt");
auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
Name + ".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ L->setAAMetadata(AAMD);
V = IC.Builder->CreateInsertValue(V, L, i);
Offset += EltSize;
}
@@ -1192,7 +1205,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
AddrName);
auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ llvm::Instruction *NS =
+ IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ AAMDNodes AAMD;
+ SI.getAAMetadata(AAMD);
+ NS->setAAMetadata(AAMD);
}
return true;
@@ -1239,7 +1256,10 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
AddrName);
auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, Offset);
- IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ AAMDNodes AAMD;
+ SI.getAAMetadata(AAMD);
+ NS->setAAMetadata(AAMD);
Offset += EltSize;
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 365c4ba75154..579639a6194e 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -227,8 +227,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap())
Shl->setHasNoUnsignedWrap();
if (I.hasNoSignedWrap()) {
- uint64_t V;
- if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+ const APInt *V;
+ if (match(NewCst, m_APInt(V)) && *V != Width - 1)
Shl->setHasNoSignedWrap();
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 33951e66497a..80c6595904e1 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1167,6 +1167,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = canonicalizeSelectToShuffle(SI))
return I;
+ // Canonicalize a one-use integer compare with a non-canonical predicate by
+ // inverting the predicate and swapping the select operands. This matches a
+ // compare canonicalization for conditional branches.
+ // TODO: Should we do the same for FP compares?
+ CmpInst::Predicate Pred;
+ if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) &&
+ !isCanonicalPredicate(Pred)) {
+ // Swap true/false values and condition.
+ CmpInst *Cond = cast<CmpInst>(CondVal);
+ Cond->setPredicate(CmpInst::getInversePredicate(Pred));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ SI.swapProfMetadata();
+ Worklist.Add(Cond);
+ return &SI;
+ }
+
if (SelType->getScalarType()->isIntegerTy(1) &&
TrueVal->getType() == CondVal->getType()) {
if (match(TrueVal, m_One())) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 02fac4fb37a4..723414635d6f 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2425,9 +2425,15 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Builder->SetInsertPoint(L);
Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
+ Instruction *NL = Builder->CreateLoad(GEP);
+ // Whatever aliasing information we had for the orignal load must also
+ // hold for the smaller load, so propagate the annotations.
+ AAMDNodes Nodes;
+ L->getAAMetadata(Nodes);
+ NL->setAAMetadata(Nodes);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use replaceInstUsesWith().
- return replaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ return replaceInstUsesWith(EV, NL);
}
// We could simplify extracts from other values. Note that nested extracts may
// already be simplified implicitly by the above: extract (extract (insert) )
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index c3810366bf22..a49c9b68c97d 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -38,6 +38,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -340,6 +341,49 @@ void ConstantHoistingPass::collectConstantCandidates(
}
}
+
+/// \brief Check the operand for instruction Inst at index Idx.
+void ConstantHoistingPass::collectConstantCandidates(
+ ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
+ Value *Opnd = Inst->getOperand(Idx);
+
+ // Visit constant integers.
+ if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+
+ // Visit cast instructions that have constant integers.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ // Only visit cast instructions, which have been skipped. All other
+ // instructions should have already been visited.
+ if (!CastInst->isCast())
+ return;
+
+ if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the cast instruction.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+ }
+
+ // Visit constant expressions that have constant integers.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ // Only visit constant cast expressions.
+ if (!ConstExpr->isCast())
+ return;
+
+ if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the constant expression.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+ }
+}
+
+
/// \brief Scan the instruction for expensive integer constants and record them
/// in the constant candidate vector.
void ConstantHoistingPass::collectConstantCandidates(
@@ -365,44 +409,25 @@ void ConstantHoistingPass::collectConstantCandidates(
if (AI && AI->isStaticAlloca())
return;
- // Scan all operands.
- for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
- Value *Opnd = Inst->getOperand(Idx);
-
- // Visit constant integers.
- if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
- }
-
- // Visit cast instructions that have constant integers.
- if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
- // Only visit cast instructions, which have been skipped. All other
- // instructions should have already been visited.
- if (!CastInst->isCast())
- continue;
-
- if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
- // Pretend the constant is directly used by the instruction and ignore
- // the cast instruction.
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
+ // Constants in GEPs that index into a struct type should not be hoisted.
+ if (isa<GetElementPtrInst>(Inst)) {
+ gep_type_iterator GTI = gep_type_begin(Inst);
+
+ // Collect constant for first operand.
+ collectConstantCandidates(ConstCandMap, Inst, 0);
+ // Scan rest operands.
+ for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) {
+ // Only collect constants that index into a non struct type.
+ if (!GTI.isStruct()) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx);
}
}
+ return;
+ }
- // Visit constant expressions that have constant integers.
- if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
- // Only visit constant cast expressions.
- if (!ConstExpr->isCast())
- continue;
-
- if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
- // Pretend the constant is directly used by the instruction and ignore
- // the constant expression.
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
- }
- }
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx);
} // end of for all operands
}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 2f96c3064b86..a40c22c3fce9 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -917,7 +917,6 @@ LoopConstrainer::calculateSubRanges() const {
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
- ConstantInt *One = ConstantInt::get(Ty, 1);
const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
@@ -948,8 +947,9 @@ LoopConstrainer::calculateSubRanges() const {
// will be an empty range. Returning an empty range is always safe.
//
- Smallest = SE.getAddExpr(End, SE.getSCEV(One));
- Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
+ const SCEV *One = SE.getOne(Ty);
+ Smallest = SE.getAddExpr(End, One);
+ Greatest = SE.getAddExpr(Start, One);
}
auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 62aa6ee48069..530a68424d5c 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -131,7 +131,7 @@ static const unsigned NoThreshold = UINT_MAX;
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound) {
@@ -158,7 +158,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowPeeling = true;
// Override with any target specific settings
- TTI.getUnrollingPreferences(L, UP);
+ TTI.getUnrollingPreferences(L, SE, UP);
// Apply size attributes
if (L->getHeader()->getParent()->optForSize()) {
@@ -699,7 +699,7 @@ static uint64_t getUnrolledLoopSize(
// Calculates unroll count and writes it to UP.Count.
static bool computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
+ ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
@@ -770,7 +770,7 @@ static bool computeUnrollCount(
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, *SE, TTI,
+ L, FullUnrollTripCount, DT, SE, TTI,
UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
@@ -836,6 +836,8 @@ static bool computeUnrollCount(
} else {
UP.Count = TripCount;
}
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
UP.Count != TripCount)
ORE->emit(
@@ -926,7 +928,7 @@ static bool computeUnrollCount(
}
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount,
@@ -948,8 +950,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound);
+ L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
return false;
@@ -977,8 +979,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
ExitingBlock = L->getExitingBlock();
if (ExitingBlock) {
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
}
// If the loop contains a convergent operation, the prelude we'd add
@@ -1000,8 +1002,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// count.
bool MaxOrZero = false;
if (!TripCount) {
- MaxTripCount = SE->getSmallConstantMaxTripCount(L);
- MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+ MaxTripCount = SE.getSmallConstantMaxTripCount(L);
+ MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
// We can unroll by the upper bound amount if it's generally allowed or if
// we know that the loop is executed either the upper bound or zero times.
// (MaxOrZero unrolling keeps only the first loop test, so the number of
@@ -1030,7 +1032,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
+ TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
PreserveLCSSA))
return false;
@@ -1073,7 +1075,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -1157,7 +1159,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
if (!AllowPartialUnrolling)
AllowPartialParam = RuntimeParam = UpperBoundParam = false;
bool Changed = tryToUnrollLoop(
- &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
/*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
if (!Changed)
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 7a7624f77542..9cf01c6582b5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2423,8 +2423,7 @@ void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
AllTempInstructions.insert(Op);
PHIOfOpsPHIs[BB].push_back(Op);
TempToBlock[Op] = BB;
- if (ExistingValue)
- RealToTemp[ExistingValue] = Op;
+ RealToTemp[ExistingValue] = Op;
}
static bool okayForPHIOfOps(const Instruction *I) {
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 6da551bd7efd..cdba0062953f 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1894,6 +1894,8 @@ void ReassociatePass::EraseInst(Instruction *I) {
Op = Op->user_back();
RedoInsts.insert(Op);
}
+
+ MadeChange = true;
}
// Canonicalize expressions of the following form:
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index a52739bb76f7..a73e9aec0617 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1954,7 +1954,7 @@ static void rematerializeLiveValues(CallSite CS,
// to identify the newly generated AlternateRootPhi (.base version of phi)
// and RootOfChain (the original phi node itself) are the same, so that we
// can rematerialize the gep and casts. This is a workaround for the
- // deficieny in the findBasePointer algorithm.
+ // deficiency in the findBasePointer algorithm.
if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
continue;
// Now that the phi nodes are proved to be the same, assert that
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 80fbbeb6829b..4729f4ef5956 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2402,9 +2402,20 @@ private:
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ // Any !nonnull metadata or !range metadata on the old load is also valid
+ // on the new load. This is even true in some cases even when the loads
+ // are different types, for example by mapping !nonnull metadata to
+ // !range metadata by modeling the null pointer constant converted to the
+ // integer type.
+ // FIXME: Add support for range metadata here. Currently the utilities
+ // for this don't propagate range metadata in trivial cases from one
+ // integer load to another, don't handle non-addrspace-0 null pointers
+ // correctly, and don't have any support for mapping ranges as the
+ // integer type becomes winder or narrower.
+ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
+ copyNonnullMetadata(LI, N, *NewLI);
+
// Try to preserve nonnull metadata
- if (TargetTy->isPointerTy())
- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
V = NewLI;
// If this is an integer load past the end of the slice (which means the
@@ -3580,10 +3591,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto AS = LI->getPointerAddressSpace();
+ auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, BasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
+ APInt(DL.getPointerSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 5d57ed9718fb..30d8856cfbef 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -59,6 +59,33 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
// Landing pads must be in the function where they were inserted for cleanup.
if (BB.isEHPad())
return false;
+ // taking the address of a basic block moved to another function is illegal
+ if (BB.hasAddressTaken())
+ return false;
+
+ // don't hoist code that uses another basicblock address, as it's likely to
+ // lead to unexpected behavior, like cross-function jumps
+ SmallPtrSet<User const *, 16> Visited;
+ SmallVector<User const *, 16> ToVisit;
+
+ for (Instruction const &Inst : BB)
+ ToVisit.push_back(&Inst);
+
+ while (!ToVisit.empty()) {
+ User const *Curr = ToVisit.pop_back_val();
+ if (!Visited.insert(Curr).second)
+ continue;
+ if (isa<BlockAddress const>(Curr))
+ return false; // even a reference to self is likely to be not compatible
+
+ if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
+ continue;
+
+ for (auto const &U : Curr->operands()) {
+ if (auto *UU = dyn_cast<User>(U))
+ ToVisit.push_back(UU);
+ }
+ }
// Don't hoist code containing allocas, invokes, or vastarts.
for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 5f85e17927fa..9ad2b707e6b2 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -36,6 +36,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -45,6 +46,10 @@ using namespace llvm;
STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+ "epilog is generated"));
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
@@ -285,15 +290,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
-///
-static void CloneLoopBlocks(Loop *L, Value *NewIter,
- const bool CreateRemainderLoop,
- const bool UseEpilogRemainder,
- BasicBlock *InsertTop, BasicBlock *InsertBot,
- BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI) {
+/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
+ std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -418,7 +421,10 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
// Set operand 0 to refer to the loop id itself.
NewLoopID->replaceOperandWith(0, NewLoopID);
NewLoop->setLoopID(NewLoopID);
+ return NewLoop;
}
+ else
+ return nullptr;
}
/// Insert code in the prolog/epilog code when unrolling a loop with a
@@ -465,29 +471,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
- if (!L->getExitingBlock())
+ if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
return false;
- // Make sure the loop is in canonical form, and there is a single
- // exit block only.
+ // Make sure the loop is in canonical form.
if (!L->isLoopSimplifyForm())
return false;
// Guaranteed by LoopSimplifyForm.
BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
- if (!LatchExit)
+ if (!LatchExit && !UnrollRuntimeMultiExit)
return false;
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
// Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be the single exit block out of the loop. This needs
+ // targets of the Latch be an exit block out of the loop. This needs
// to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- assert((LatchBR->getSuccessor(0) == LatchExit ||
- LatchBR->getSuccessor(1) == LatchExit) &&
- "one of the loop latch successors should be "
- "the exit block!");
- (void)LatchBR;
+ assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+ "one of the loop latch successors should be the exit block!");
+ // Support runtime unrolling for multiple exit blocks and multiple exiting
+ // blocks.
+ if (!LatchExit) {
+ assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
+ "unrolling with epilog remainder only!");
+ LatchExit = LatchBR->getSuccessor(ExitIndex);
+ // We rely on LCSSA form being preserved when the exit blocks are
+ // transformed.
+ if (!PreserveLCSSA)
+ return false;
+ // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
+ // will need updating the logic in connectEpilog.
+ if (!LatchExit->getSinglePredecessor())
+ return false;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ for (auto *BB : Exits)
+ if (BB != LatchExit)
+ OtherExits.push_back(BB);
+ }
+
+ assert(LatchExit && "Latch Exit should exist!");
+
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -495,7 +524,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Only unroll loops with a computable trip count, and the trip count needs
// to be an int value (allowing a pointer type is a TODO item).
- const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
+ // We calculate the backedge count by using getExitCount on the Latch block,
+ // which is proven to be the only exiting block in this loop. This is same as
+ // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
+ // exiting blocks).
+ const SCEV *BECountSC = SE->getExitCount(L, Latch);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
return false;
@@ -508,7 +541,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
- BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
@@ -650,8 +682,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
- InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+ Loop *remainderLoop = CloneLoopBlocks(
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
@@ -659,6 +692,42 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewBlocks[0]->getIterator(),
F->end());
+ // Now the loop blocks are cloned and the other exiting blocks from the
+ // remainder are connected to the original Loop's exit blocks. The remaining
+ // work is to update the phi nodes in the original loop, and take in the
+ // values from the cloned region. Also update the dominator info for
+ // OtherExits, since we have new edges into OtherExits.
+ for (auto *BB : OtherExits) {
+ for (auto &II : *BB) {
+
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ if (!isa<PHINode>(II))
+ break;
+ PHINode *Phi = cast<PHINode>(&II);
+ unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Add the incoming values from the remainder code to the end of the phi
+ // node.
+ for (unsigned i =0; i < oldNumOperands; i++){
+ Value *newVal = VMap[Phi->getIncomingValue(i)];
+ if (!newVal) {
+ assert(isa<Constant>(Phi->getIncomingValue(i)) &&
+ "VMap should exist for all values except constants!");
+ newVal = Phi->getIncomingValue(i);
+ }
+ Phi->addIncoming(newVal,
+ cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ }
+ }
+ // Update the dominator info because the immediate dominator is no longer the
+ // header of the original Loop. BB has edges both from L and remainder code.
+ // Since the preheader determines which loop is run (L or directly jump to
+ // the remainder code), we set the immediate dominator as the preheader.
+ if (DT)
+ DT->changeImmediateDominator(BB, PreHeader);
+ }
+
// Loop structure should be the following:
// Epilog Prolog
//
@@ -721,6 +790,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
+ // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+ // cannot rely on the LoopUnrollPass to do this because it only does
+ // canonicalization for parent/subloops and not the sibling loops.
+ if (OtherExits.size() > 0) {
+ // Generate dedicated exit blocks for the original loop, to preserve
+ // LoopSimplifyForm.
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ // Generate dedicated exit blocks for the remainder loop if one exists, to
+ // preserve LoopSimplifyForm.
+ if (remainderLoop)
+ formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 0ed33945ef40..58b70be95d99 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -528,8 +528,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
-bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DominatorTree *DT) {
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -551,12 +552,24 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
// Get the previous value. The previous value comes from the latch edge while
// the initial value comes form the preheader edge.
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false;
// Ensure every user of the phi node is dominated by the previous value.
// The dominance requirement ensures the loop vectorizer will not need to
// vectorize the initial value prior to the first iteration of the loop.
+ // TODO: Consider extending this sinking to handle other kinds of instructions
+ // and expressions, beyond sinking a single cast past Previous.
+ if (Phi->hasOneUse()) {
+ auto *I = Phi->user_back();
+ if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+ DT->dominates(Previous, I->user_back())) {
+ SinkAfter[I] = Previous;
+ return true;
+ }
+ }
+
for (User *U : Phi->users())
if (auto *I = dyn_cast<Instruction>(U)) {
if (!DT->dominates(Previous, I))
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 0a51f9a0e4a2..1c2a60a6b8b2 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -27,7 +27,6 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore,
BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
F, NewBB);
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
IRBuilder<> Builder(OrigBB->getTerminator());
// SrcAddr and DstAddr are expected to be pointer types,
@@ -39,6 +38,11 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore,
SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
IRBuilder<> LoopBuilder(LoopBB);
PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
@@ -167,6 +171,7 @@ static void createMemMoveLoop(Instruction *InsertBefore,
static void createMemSetLoop(Instruction *InsertBefore,
Value *DstAddr, Value *CopyLen, Value *SetValue,
unsigned Align, bool IsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
BasicBlock *NewBB =
@@ -174,7 +179,6 @@ static void createMemSetLoop(Instruction *InsertBefore,
BasicBlock *LoopBB
= BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
IRBuilder<> Builder(OrigBB->getTerminator());
// Cast pointer to the type of value getting stored
@@ -182,9 +186,14 @@ static void createMemSetLoop(Instruction *InsertBefore,
DstAddr = Builder.CreateBitCast(DstAddr,
PointerType::get(SetValue->getType(), dstAS));
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
- LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
LoopBuilder.CreateStore(
SetValue,
@@ -192,7 +201,7 @@ static void createMemSetLoop(Instruction *InsertBefore,
IsVolatile);
Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
LoopIndex->addIncoming(NewIndex, LoopBB);
LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp
index 2e67e0def5b9..dc780542ce68 100644
--- a/lib/Transforms/Utils/OrderedInstructions.cpp
+++ b/lib/Transforms/Utils/OrderedInstructions.cpp
@@ -27,7 +27,6 @@ bool OrderedInstructions::dominates(const Instruction *InstA,
if (OBB == OBBMap.end())
OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
return OBB->second->dominates(InstA, InstB);
- } else {
- return DT->dominates(InstA->getParent(), InstB->getParent());
}
+ return DT->dominates(InstA->getParent(), InstB->getParent());
}
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 1260e35e934d..d4cdaede6b86 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -34,6 +33,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
@@ -106,14 +106,27 @@ struct ValueDFS {
bool EdgeOnly = false;
};
+// Perform a strict weak ordering on instructions and arguments.
+static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
+ const Value *B) {
+ auto *ArgA = dyn_cast_or_null<Argument>(A);
+ auto *ArgB = dyn_cast_or_null<Argument>(B);
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+ return OI.dominates(cast<Instruction>(A), cast<Instruction>(B));
+}
+
// This compares ValueDFS structures, creating OrderedBasicBlocks where
// necessary to compare uses/defs in the same block. Doing so allows us to walk
// the minimum number of instructions necessary to compute our def/use ordering.
struct ValueDFS_Compare {
- DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap;
- ValueDFS_Compare(
- DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap)
- : OBBMap(OBBMap) {}
+ OrderedInstructions &OI;
+ ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
+
bool operator()(const ValueDFS &A, const ValueDFS &B) const {
if (&A == &B)
return false;
@@ -196,23 +209,12 @@ struct ValueDFS_Compare {
auto *ArgA = dyn_cast_or_null<Argument>(ADef);
auto *ArgB = dyn_cast_or_null<Argument>(BDef);
- if (ArgA && !ArgB)
- return true;
- if (ArgB && !ArgA)
- return false;
- if (ArgA && ArgB)
- return ArgA->getArgNo() < ArgB->getArgNo();
+ if (ArgA || ArgB)
+ return valueComesBefore(OI, ArgA, ArgB);
auto *AInst = getDefOrUser(ADef, A.U);
auto *BInst = getDefOrUser(BDef, B.U);
-
- auto *BB = AInst->getParent();
- auto LookupResult = OBBMap.find(BB);
- if (LookupResult != OBBMap.end())
- return LookupResult->second->dominates(AInst, BInst);
-
- auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
- return Result.first->second->dominates(AInst, BInst);
+ return valueComesBefore(OI, AInst, BInst);
}
};
@@ -547,38 +549,11 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
// Sort OpsToRename since we are going to iterate it.
SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
- std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A,
- const Value *B) {
- auto *ArgA = dyn_cast_or_null<Argument>(A);
- auto *ArgB = dyn_cast_or_null<Argument>(B);
-
- // If A and B are args, order them based on their arg no.
- if (ArgA && !ArgB)
- return true;
- if (ArgB && !ArgA)
- return false;
- if (ArgA && ArgB)
- return ArgA->getArgNo() < ArgB->getArgNo();
-
- // Else, A are B are instructions.
- // If they belong to different BBs, order them by the dominance of BBs.
- auto *AInst = cast<Instruction>(A);
- auto *BInst = cast<Instruction>(B);
- if (AInst->getParent() != BInst->getParent())
- return DT.dominates(AInst->getParent(), BInst->getParent());
-
- // Else, A and B belong to the same BB.
- // Order A and B by their dominance.
- auto *BB = AInst->getParent();
- auto LookupResult = OBBMap.find(BB);
- if (LookupResult != OBBMap.end())
- return LookupResult->second->dominates(AInst, BInst);
-
- auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
- return Result.first->second->dominates(AInst, BInst);
- });
-
- ValueDFS_Compare Compare(OBBMap);
+ auto Comparator = [&](const Value *A, const Value *B) {
+ return valueComesBefore(OI, A, B);
+ };
+ std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ ValueDFS_Compare Compare(OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
unsigned Counter = 0;
@@ -715,7 +690,7 @@ PredicateInfo::getValueInfo(Value *Operand) const {
PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
AssumptionCache &AC)
- : F(F), DT(DT), AC(AC) {
+ : F(F), DT(DT), AC(AC), OI(&DT) {
// Push an empty operand info so that we can detect 0 as not finding one
ValueInfos.resize(1);
buildPredicateInfo();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 0970c436e665..e724b0a28c32 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4781,7 +4781,7 @@ public:
SwitchLookupTable(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL);
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
/// Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -4835,7 +4835,7 @@ private:
SwitchLookupTable::SwitchLookupTable(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL)
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
@@ -4943,7 +4943,7 @@ SwitchLookupTable::SwitchLookupTable(
Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
GlobalVariable::PrivateLinkage, Initializer,
- "switch.table");
+ "switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Kind = ArrayKind;
}
@@ -5333,7 +5333,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL);
+ StringRef FuncName = SI->getParent()->getParent()->getName();
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
+ FuncName);
Value *Result = Table.BuildLookup(TableIndex, Builder);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index faa14046b1e3..ec8b0d426265 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -354,7 +354,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
- const SCEV *, Type *);
+ const SCEV *, Type *, unsigned);
OperationFunctionTy Operation;
ExtensionFunctionTy Extension;
@@ -406,11 +406,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
const SCEV *A =
- (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0u),
- WideTy);
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0),
+ WideTy, 0);
const SCEV *B =
- (SE->*Operation)((SE->*Extension)(LHS, WideTy),
- (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap, 0u);
+ (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
+ (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
if (A != B)
return false;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
deleted file mode 100644
index 78453aaa16ce..000000000000
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ /dev/null
@@ -1,3282 +0,0 @@
-//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a basic-block vectorization pass. The algorithm was
-// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
-// et al. It works by looking for chains of pairable operations and then
-// pairing them.
-//
-//===----------------------------------------------------------------------===//
-
-#define BBV_NAME "bb-vectorize"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
-#include <algorithm>
-using namespace llvm;
-
-#define DEBUG_TYPE BBV_NAME
-
-static cl::opt<bool>
-IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
- cl::Hidden, cl::desc("Ignore target information"));
-
-static cl::opt<unsigned>
-ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
- cl::desc("The required chain depth for vectorization"));
-
-static cl::opt<bool>
-UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false),
- cl::Hidden, cl::desc("Use the chain depth requirement with"
- " target information"));
-
-static cl::opt<unsigned>
-SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
- cl::desc("The maximum search distance for instruction pairs"));
-
-static cl::opt<bool>
-SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
- cl::desc("Replicating one element to a pair breaks the chain"));
-
-static cl::opt<unsigned>
-VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
- cl::desc("The size of the native vector registers"));
-
-static cl::opt<unsigned>
-MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
- cl::desc("The maximum number of pairing iterations"));
-
-static cl::opt<bool>
-Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden,
- cl::desc("Don't try to form non-2^n-length vectors"));
-
-static cl::opt<unsigned>
-MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
- cl::desc("The maximum number of pairable instructions per group"));
-
-static cl::opt<unsigned>
-MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden,
- cl::desc("The maximum number of candidate instruction pairs per group"));
-
-static cl::opt<unsigned>
-MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
- cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
- " a full cycle check"));
-
-static cl::opt<bool>
-NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize boolean (i1) values"));
-
-static cl::opt<bool>
-NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize integer values"));
-
-static cl::opt<bool>
-NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize floating-point values"));
-
-// FIXME: This should default to false once pointer vector support works.
-static cl::opt<bool>
-NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
- cl::desc("Don't try to vectorize pointer values"));
-
-static cl::opt<bool>
-NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize casting (conversion) operations"));
-
-static cl::opt<bool>
-NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize floating-point math intrinsics"));
-
-static cl::opt<bool>
- NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize BitManipulation intrinsics"));
-
-static cl::opt<bool>
-NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
-
-static cl::opt<bool>
-NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize select instructions"));
-
-static cl::opt<bool>
-NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize comparison instructions"));
-
-static cl::opt<bool>
-NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize getelementptr instructions"));
-
-static cl::opt<bool>
-NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize loads and stores"));
-
-static cl::opt<bool>
-AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
- cl::desc("Only generate aligned loads and stores"));
-
-static cl::opt<bool>
-NoMemOpBoost("bb-vectorize-no-mem-op-boost",
- cl::init(false), cl::Hidden,
- cl::desc("Don't boost the chain-depth contribution of loads and stores"));
-
-static cl::opt<bool>
-FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
- cl::desc("Use a fast instruction dependency analysis"));
-
-#ifndef NDEBUG
-static cl::opt<bool>
-DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " instruction-examination process"));
-static cl::opt<bool>
-DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " candidate-selection process"));
-static cl::opt<bool>
-DebugPairSelection("bb-vectorize-debug-pair-selection",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " pair-selection process"));
-static cl::opt<bool>
-DebugCycleCheck("bb-vectorize-debug-cycle-check",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " cycle-checking process"));
-
-static cl::opt<bool>
-PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, dump the basic block after"
- " every pair is fused"));
-#endif
-
-STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
-
-namespace {
- struct BBVectorize : public BasicBlockPass {
- static char ID; // Pass identification, replacement for typeid
-
- const VectorizeConfig Config;
-
- BBVectorize(const VectorizeConfig &C = VectorizeConfig())
- : BasicBlockPass(ID), Config(C) {
- initializeBBVectorizePass(*PassRegistry::getPassRegistry());
- }
-
- BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
- : BasicBlockPass(ID), Config(C) {
- AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &P->getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &P->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- TTI = IgnoreTargetInfo
- ? nullptr
- : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- }
-
- typedef std::pair<Value *, Value *> ValuePair;
- typedef std::pair<ValuePair, int> ValuePairWithCost;
- typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
- typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
- typedef std::pair<VPPair, unsigned> VPPairWithType;
-
- AliasAnalysis *AA;
- DominatorTree *DT;
- ScalarEvolution *SE;
- const TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
-
- // FIXME: const correct?
-
- bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false);
-
- bool getCandidatePairs(BasicBlock &BB,
- BasicBlock::iterator &Start,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts, bool NonPow2Len);
-
- // FIXME: The current implementation does not account for pairs that
- // are connected in multiple ways. For example:
- // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
- enum PairConnectionType {
- PairConnectionDirect,
- PairConnectionSwap,
- PairConnectionSplat
- };
-
- void computeConnectedPairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes);
-
- void buildDepMap(BasicBlock &BB,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &PairableInstUsers);
-
- void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs);
-
- void fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *>& ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
-
-
- bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
-
- bool areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len,
- int &CostSavings, int &FixedOrder);
-
- bool trackUsesOfI(DenseSet<Value *> &Users,
- AliasSetTracker &WriteSet, Instruction *I,
- Instruction *J, bool UpdateUsers = true,
- DenseSet<ValuePair> *LoadMoveSetPairs = nullptr);
-
- void computePairsConnectedTo(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P);
-
- bool pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> >
- *PairableInstUserMap = nullptr,
- DenseSet<VPPair> *PairableInstUserPairSet = nullptr);
-
- bool pairWillFormCycle(ValuePair P,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
- DenseSet<ValuePair> &CurrentPairs);
-
- void pruneDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG,
- DenseSet<ValuePair> &PrunedDAG, ValuePair J,
- bool UseCycleCheck);
-
- void buildInitialDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG, ValuePair J);
-
- void findBestDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
- int &BestEffSize, Value *II, std::vector<Value *>&JJ,
- bool UseCycleCheck);
-
- Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o);
-
- void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned MaskOffset, unsigned NumInElem,
- unsigned NumInElem1, unsigned IdxOffset,
- std::vector<Constant*> &Mask);
-
- Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
- Instruction *J);
-
- bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
- unsigned o, Value *&LOp, unsigned numElemL,
- Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
- unsigned IdxOff = 0);
-
- Value *getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool IBeforeJ);
-
- void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
- Instruction *J, SmallVectorImpl<Value *> &ReplacedOperands,
- bool IBeforeJ);
-
- void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
- Instruction *J, Instruction *K,
- Instruction *&InsertionPt, Instruction *&K1,
- Instruction *&K2);
-
- void collectPairLoadMoveSet(BasicBlock &BB,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I);
-
- void collectLoadMoveSet(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs);
-
- bool canMoveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I, Instruction *J);
-
- void moveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *&InsertionPt,
- Instruction *I, Instruction *J);
-
- bool vectorizeBB(BasicBlock &BB) {
- if (skipBasicBlock(BB))
- return false;
- if (!DT->isReachableFromEntry(&BB)) {
- DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
- " in " << BB.getParent()->getName() << "\n");
- return false;
- }
-
- DEBUG(if (TTI) dbgs() << "BBV: using target information\n");
-
- bool changed = false;
- // Iterate a sufficient number of times to merge types of size 1 bit,
- // then 2 bits, then 4, etc. up to half of the target vector width of the
- // target vector register.
- unsigned n = 1;
- for (unsigned v = 2;
- (TTI || v <= Config.VectorBits) &&
- (!Config.MaxIter || n <= Config.MaxIter);
- v *= 2, ++n) {
- DEBUG(dbgs() << "BBV: fusing loop #" << n <<
- " for " << BB.getName() << " in " <<
- BB.getParent()->getName() << "...\n");
- if (vectorizePairs(BB))
- changed = true;
- else
- break;
- }
-
- if (changed && !Pow2LenOnly) {
- ++n;
- for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
- DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " <<
- n << " for " << BB.getName() << " in " <<
- BB.getParent()->getName() << "...\n");
- if (!vectorizePairs(BB, true)) break;
- }
- }
-
- DEBUG(dbgs() << "BBV: done!\n");
- return changed;
- }
-
- bool runOnBasicBlock(BasicBlock &BB) override {
- // OptimizeNone check deferred to vectorizeBB().
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- TTI = IgnoreTargetInfo
- ? nullptr
- : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *BB.getParent());
-
- return vectorizeBB(BB);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- BasicBlockPass::getAnalysisUsage(AU);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.setPreservesCFG();
- }
-
- static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) {
- assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() &&
- "Cannot form vector from incompatible scalar types");
- Type *STy = ElemTy->getScalarType();
-
- unsigned numElem;
- if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
- numElem = VTy->getNumElements();
- } else {
- numElem = 1;
- }
-
- if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
- numElem += VTy->getNumElements();
- } else {
- numElem += 1;
- }
-
- return VectorType::get(STy, numElem);
- }
-
- static inline void getInstructionTypes(Instruction *I,
- Type *&T1, Type *&T2) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // For stores, it is the value type, not the pointer type that matters
- // because the value is what will come from a vector register.
-
- Value *IVal = SI->getValueOperand();
- T1 = IVal->getType();
- } else {
- T1 = I->getType();
- }
-
- if (CastInst *CI = dyn_cast<CastInst>(I))
- T2 = CI->getSrcTy();
- else
- T2 = T1;
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- T2 = SI->getCondition()->getType();
- } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
- T2 = SI->getOperand(0)->getType();
- } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
- T2 = CI->getOperand(0)->getType();
- }
- }
-
- // Returns the weight associated with the provided value. A chain of
- // candidate pairs has a length given by the sum of the weights of its
- // members (one weight per pair; the weight of each member of the pair
- // is assumed to be the same). This length is then compared to the
- // chain-length threshold to determine if a given chain is significant
- // enough to be vectorized. The length is also used in comparing
- // candidate chains where longer chains are considered to be better.
- // Note: when this function returns 0, the resulting instructions are
- // not actually fused.
- inline size_t getDepthFactor(Value *V) {
- // InsertElement and ExtractElement have a depth factor of zero. This is
- // for two reasons: First, they cannot be usefully fused. Second, because
- // the pass generates a lot of these, they can confuse the simple metric
- // used to compare the dags in the next iteration. Thus, giving them a
- // weight of zero allows the pass to essentially ignore them in
- // subsequent iterations when looking for vectorization opportunities
- // while still tracking dependency chains that flow through those
- // instructions.
- if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
- return 0;
-
- // Give a load or store half of the required depth so that load/store
- // pairs will vectorize.
- if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
- return Config.ReqChainDepth/2;
-
- return 1;
- }
-
- // Returns the cost of the provided instruction using TTI.
- // This does not handle loads and stores.
- unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2,
- TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue,
- TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue,
- const Instruction *I = nullptr) {
- switch (Opcode) {
- default: break;
- case Instruction::GetElementPtr:
- // We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the instruction addressing mode. At the moment we don't
- // generate vector GEPs.
- return 0;
- case Instruction::Br:
- return TTI->getCFInstrCost(Opcode);
- case Instruction::PHI:
- return 0;
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
- case Instruction::Select:
- case Instruction::ICmp:
- case Instruction::FCmp:
- return TTI->getCmpSelInstrCost(Opcode, T1, T2, I);
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::ShuffleVector:
- return TTI->getCastInstrCost(Opcode, T1, T2, I);
- }
-
- return 1;
- }
-
- // This determines the relative offset of two loads or stores, returning
- // true if the offset could be determined to be some constant value.
- // For example, if OffsetInElmts == 1, then J accesses the memory directly
- // after I; if OffsetInElmts == -1 then I accesses the memory
- // directly after J.
- bool getPairPtrInfo(Instruction *I, Instruction *J,
- Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
- unsigned &IAddressSpace, unsigned &JAddressSpace,
- int64_t &OffsetInElmts, bool ComputeOffset = true) {
- OffsetInElmts = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- LoadInst *LJ = cast<LoadInst>(J);
- IPtr = LI->getPointerOperand();
- JPtr = LJ->getPointerOperand();
- IAlignment = LI->getAlignment();
- JAlignment = LJ->getAlignment();
- IAddressSpace = LI->getPointerAddressSpace();
- JAddressSpace = LJ->getPointerAddressSpace();
- } else {
- StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
- IPtr = SI->getPointerOperand();
- JPtr = SJ->getPointerOperand();
- IAlignment = SI->getAlignment();
- JAlignment = SJ->getAlignment();
- IAddressSpace = SI->getPointerAddressSpace();
- JAddressSpace = SJ->getPointerAddressSpace();
- }
-
- if (!ComputeOffset)
- return true;
-
- const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
- const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
-
- // If this is a trivial offset, then we'll get something like
- // 1*sizeof(type). With target data, which we need anyway, this will get
- // constant folded into a number.
- const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
- if (const SCEVConstant *ConstOffSCEV =
- dyn_cast<SCEVConstant>(OffsetSCEV)) {
- ConstantInt *IntOff = ConstOffSCEV->getValue();
- int64_t Offset = IntOff->getSExtValue();
- const DataLayout &DL = I->getModule()->getDataLayout();
- Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
-
- Type *VTy2 = JPtr->getType()->getPointerElementType();
- if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
- OffsetInElmts = Offset/VTy2TSS;
- return (std::abs(Offset) % VTy2TSS) == 0;
- }
-
- OffsetInElmts = Offset/VTyTSS;
- return (std::abs(Offset) % VTyTSS) == 0;
- }
-
- return false;
- }
-
- // Returns true if the provided CallInst represents an intrinsic that can
- // be vectorized.
- bool isVectorizableIntrinsic(CallInst* I) {
- Function *F = I->getCalledFunction();
- if (!F) return false;
-
- Intrinsic::ID IID = F->getIntrinsicID();
- if (!IID) return false;
-
- switch(IID) {
- default:
- return false;
- case Intrinsic::sqrt:
- case Intrinsic::powi:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::round:
- case Intrinsic::copysign:
- case Intrinsic::ceil:
- case Intrinsic::nearbyint:
- case Intrinsic::rint:
- case Intrinsic::trunc:
- case Intrinsic::floor:
- case Intrinsic::fabs:
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- return Config.VectorizeMath;
- case Intrinsic::bswap:
- case Intrinsic::ctpop:
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
- return Config.VectorizeBitManipulations;
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return Config.VectorizeFMA;
- }
- }
-
- bool isPureIEChain(InsertElementInst *IE) {
- InsertElementInst *IENext = IE;
- do {
- if (!isa<UndefValue>(IENext->getOperand(0)) &&
- !isa<InsertElementInst>(IENext->getOperand(0))) {
- return false;
- }
- } while ((IENext =
- dyn_cast<InsertElementInst>(IENext->getOperand(0))));
-
- return true;
- }
- };
-
- // This function implements one vectorization iteration on the provided
- // basic block. It returns true if the block is changed.
- bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) {
- bool ShouldContinue;
- BasicBlock::iterator Start = BB.getFirstInsertionPt();
-
- std::vector<Value *> AllPairableInsts;
- DenseMap<Value *, Value *> AllChosenPairs;
- DenseSet<ValuePair> AllFixedOrderPairs;
- DenseMap<VPPair, unsigned> AllPairConnectionTypes;
- DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs,
- AllConnectedPairDeps;
-
- do {
- std::vector<Value *> PairableInsts;
- DenseMap<Value *, std::vector<Value *> > CandidatePairs;
- DenseSet<ValuePair> FixedOrderPairs;
- DenseMap<ValuePair, int> CandidatePairCostSavings;
- ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
- FixedOrderPairs,
- CandidatePairCostSavings,
- PairableInsts, NonPow2Len);
- if (PairableInsts.empty()) continue;
-
- // Build the candidate pair set for faster lookups.
- DenseSet<ValuePair> CandidatePairsSet;
- for (DenseMap<Value *, std::vector<Value *> >::iterator I =
- CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I)
- for (std::vector<Value *>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- CandidatePairsSet.insert(ValuePair(I->first, *J));
-
- // Now we have a map of all of the pairable instructions and we need to
- // select the best possible pairing. A good pairing is one such that the
- // users of the pair are also paired. This defines a (directed) forest
- // over the pairs such that two pairs are connected iff the second pair
- // uses the first.
-
- // Note that it only matters that both members of the second pair use some
- // element of the first pair (to allow for splatting).
-
- DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs,
- ConnectedPairDeps;
- DenseMap<VPPair, unsigned> PairConnectionTypes;
- computeConnectedPairs(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs, PairConnectionTypes);
- if (ConnectedPairs.empty()) continue;
-
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
- I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I)
- for (std::vector<ValuePair>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- ConnectedPairDeps[*J].push_back(I->first);
-
- // Build the pairable-instruction dependency map
- DenseSet<ValuePair> PairableInstUsers;
- buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
-
- // There is now a graph of the connected pairs. For each variable, pick
- // the pairing with the largest dag meeting the depth requirement on at
- // least one branch. Then select all pairings that are part of that dag
- // and remove them from the list of available pairings and pairable
- // variables.
-
- DenseMap<Value *, Value *> ChosenPairs;
- choosePairs(CandidatePairs, CandidatePairsSet,
- CandidatePairCostSavings,
- PairableInsts, FixedOrderPairs, PairConnectionTypes,
- ConnectedPairs, ConnectedPairDeps,
- PairableInstUsers, ChosenPairs);
-
- if (ChosenPairs.empty()) continue;
- AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
- PairableInsts.end());
- AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
-
- // Only for the chosen pairs, propagate information on fixed-order pairs,
- // pair connections, and their types to the data structures used by the
- // pair fusion procedures.
- for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
- IE = ChosenPairs.end(); I != IE; ++I) {
- if (FixedOrderPairs.count(*I))
- AllFixedOrderPairs.insert(*I);
- else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
- AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
-
- for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
- J != IE; ++J) {
- DenseMap<VPPair, unsigned>::iterator K =
- PairConnectionTypes.find(VPPair(*I, *J));
- if (K != PairConnectionTypes.end()) {
- AllPairConnectionTypes.insert(*K);
- } else {
- K = PairConnectionTypes.find(VPPair(*J, *I));
- if (K != PairConnectionTypes.end())
- AllPairConnectionTypes.insert(*K);
- }
- }
- }
-
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
- I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I)
- for (std::vector<ValuePair>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- if (AllPairConnectionTypes.count(VPPair(I->first, *J))) {
- AllConnectedPairs[I->first].push_back(*J);
- AllConnectedPairDeps[*J].push_back(I->first);
- }
- } while (ShouldContinue);
-
- if (AllChosenPairs.empty()) return false;
- NumFusedOps += AllChosenPairs.size();
-
- // A set of pairs has now been selected. It is now necessary to replace the
- // paired instructions with vector instructions. For this procedure each
- // operand must be replaced with a vector operand. This vector is formed
- // by using build_vector on the old operands. The replaced values are then
- // replaced with a vector_extract on the result. Subsequent optimization
- // passes should coalesce the build/extract combinations.
-
- fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
- AllPairConnectionTypes,
- AllConnectedPairs, AllConnectedPairDeps);
-
- // It is important to cleanup here so that future iterations of this
- // function have less work to do.
- (void)SimplifyInstructionsInBlock(&BB, TLI);
- return true;
- }
-
- // This function returns true if the provided instruction is capable of being
- // fused into a vector instruction. This determination is based only on the
- // type and other attributes of the instruction.
- bool BBVectorize::isInstVectorizable(Instruction *I,
- bool &IsSimpleLoadStore) {
- IsSimpleLoadStore = false;
-
- if (CallInst *C = dyn_cast<CallInst>(I)) {
- if (!isVectorizableIntrinsic(C))
- return false;
- } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
- // Vectorize simple loads if possbile:
- IsSimpleLoadStore = L->isSimple();
- if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
- return false;
- } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
- // Vectorize simple stores if possbile:
- IsSimpleLoadStore = S->isSimple();
- if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
- return false;
- } else if (CastInst *C = dyn_cast<CastInst>(I)) {
- // We can vectorize casts, but not casts of pointer types, etc.
- if (!Config.VectorizeCasts)
- return false;
-
- Type *SrcTy = C->getSrcTy();
- if (!SrcTy->isSingleValueType())
- return false;
-
- Type *DestTy = C->getDestTy();
- if (!DestTy->isSingleValueType())
- return false;
- } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- if (!Config.VectorizeSelect)
- return false;
- // We can vectorize a select if either all operands are scalars,
- // or all operands are vectors. Trying to "widen" a select between
- // vectors that has a scalar condition results in a malformed select.
- // FIXME: We could probably be smarter about this by rewriting the select
- // with different types instead.
- return (SI->getCondition()->getType()->isVectorTy() ==
- SI->getTrueValue()->getType()->isVectorTy());
- } else if (isa<CmpInst>(I)) {
- if (!Config.VectorizeCmp)
- return false;
- } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
- if (!Config.VectorizeGEP)
- return false;
-
- // Currently, vector GEPs exist only with one index.
- if (G->getNumIndices() != 1)
- return false;
- } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
- isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
- return false;
- }
-
- Type *T1, *T2;
- getInstructionTypes(I, T1, T2);
-
- // Not every type can be vectorized...
- if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
- !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
- return false;
-
- if (T1->getScalarSizeInBits() == 1) {
- if (!Config.VectorizeBools)
- return false;
- } else {
- if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
- return false;
- }
-
- if (T2->getScalarSizeInBits() == 1) {
- if (!Config.VectorizeBools)
- return false;
- } else {
- if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
- return false;
- }
-
- if (!Config.VectorizeFloats
- && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
- return false;
-
- // Don't vectorize target-specific types.
- if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy())
- return false;
- if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
- return false;
-
- if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
- T2->getScalarType()->isPointerTy()))
- return false;
-
- if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
- T2->getPrimitiveSizeInBits() >= Config.VectorBits))
- return false;
-
- return true;
- }
-
- // This function returns true if the two provided instructions are compatible
- // (meaning that they can be fused into a vector instruction). This assumes
- // that I has already been determined to be vectorizable and that J is not
- // in the use dag of I.
- bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len,
- int &CostSavings, int &FixedOrder) {
- DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
- " <-> " << *J << "\n");
-
- CostSavings = 0;
- FixedOrder = 0;
-
- // Loads and stores can be merged if they have different alignments,
- // but are otherwise the same.
- if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
- (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0)))
- return false;
-
- Type *IT1, *IT2, *JT1, *JT2;
- getInstructionTypes(I, IT1, IT2);
- getInstructionTypes(J, JT1, JT2);
- unsigned MaxTypeBits = std::max(
- IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
- IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
- if (!TTI && MaxTypeBits > Config.VectorBits)
- return false;
-
- // FIXME: handle addsub-type operations!
-
- if (IsSimpleLoadStore) {
- Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
- int64_t OffsetInElmts = 0;
- if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace, OffsetInElmts) &&
- std::abs(OffsetInElmts) == 1) {
- FixedOrder = (int) OffsetInElmts;
- unsigned BottomAlignment = IAlignment;
- if (OffsetInElmts < 0) BottomAlignment = JAlignment;
-
- Type *aTypeI = isa<StoreInst>(I) ?
- cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
- Type *aTypeJ = isa<StoreInst>(J) ?
- cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
- Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
-
- if (Config.AlignedOnly) {
- // An aligned load or store is possible only if the instruction
- // with the lower offset has an alignment suitable for the
- // vector type.
- const DataLayout &DL = I->getModule()->getDataLayout();
- unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
- if (BottomAlignment < VecAlignment)
- return false;
- }
-
- if (TTI) {
- unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI,
- IAlignment, IAddressSpace);
- unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ,
- JAlignment, JAddressSpace);
- unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
- BottomAlignment,
- IAddressSpace);
-
- ICost += TTI->getAddressComputationCost(aTypeI);
- JCost += TTI->getAddressComputationCost(aTypeJ);
- VCost += TTI->getAddressComputationCost(VType);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned VParts = TTI->getNumberOfParts(VType);
- if (VParts > 1)
- return false;
- else if (!VParts && VCost == ICost + JCost)
- return false;
-
- CostSavings = ICost + JCost - VCost;
- }
- } else {
- return false;
- }
- } else if (TTI) {
- TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue;
- TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue;
- unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2, Op1VK, Op2VK, I);
- unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2, Op1VK, Op2VK, J);
- Type *VT1 = getVecTypeForPair(IT1, JT1),
- *VT2 = getVecTypeForPair(IT2, JT2);
-
- // On some targets (example X86) the cost of a vector shift may vary
- // depending on whether the second operand is a Uniform or
- // NonUniform Constant.
- switch (I->getOpcode()) {
- default : break;
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
-
- // If both I and J are scalar shifts by constant, then the
- // merged vector shift count would be either a constant splat value
- // or a non-uniform vector of constants.
- if (ConstantInt *CII = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (ConstantInt *CIJ = dyn_cast<ConstantInt>(J->getOperand(1)))
- Op2VK = CII == CIJ ? TargetTransformInfo::OK_UniformConstantValue :
- TargetTransformInfo::OK_NonUniformConstantValue;
- } else {
- // Check for a splat of a constant or for a non uniform vector
- // of constants.
- Value *IOp = I->getOperand(1);
- Value *JOp = J->getOperand(1);
- if ((isa<ConstantVector>(IOp) || isa<ConstantDataVector>(IOp)) &&
- (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
- Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
- if (SplatValue != nullptr &&
- SplatValue == cast<Constant>(JOp)->getSplatValue())
- Op2VK = TargetTransformInfo::OK_UniformConstantValue;
- }
- }
- }
-
- // Note that this procedure is incorrect for insert and extract element
- // instructions (because combining these often results in a shuffle),
- // but this cost is ignored (because insert and extract element
- // instructions are assigned a zero depth factor and are not really
- // fused in general).
- unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK, I);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned VParts1 = TTI->getNumberOfParts(VT1),
- VParts2 = TTI->getNumberOfParts(VT2);
- if (VParts1 > 1 || VParts2 > 1)
- return false;
- else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
- return false;
-
- CostSavings = ICost + JCost - VCost;
- }
-
- // The powi,ctlz,cttz intrinsics are special because only the first
- // argument is vectorized, the second arguments must be equal.
- CallInst *CI = dyn_cast<CallInst>(I);
- Function *FI;
- if (CI && (FI = CI->getCalledFunction())) {
- Intrinsic::ID IID = FI->getIntrinsicID();
- if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) {
- Value *A1I = CI->getArgOperand(1),
- *A1J = cast<CallInst>(J)->getArgOperand(1);
- const SCEV *A1ISCEV = SE->getSCEV(A1I),
- *A1JSCEV = SE->getSCEV(A1J);
- return (A1ISCEV == A1JSCEV);
- }
-
- if (IID && TTI) {
- FastMathFlags FMFCI;
- if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
- FMFCI = FPMOCI->getFastMathFlags();
- SmallVector<Value *, 4> IArgs(CI->arg_operands());
- unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI);
-
- CallInst *CJ = cast<CallInst>(J);
-
- FastMathFlags FMFCJ;
- if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
- FMFCJ = FPMOCJ->getFastMathFlags();
-
- SmallVector<Value *, 4> JArgs(CJ->arg_operands());
- unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ);
-
- assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
- "Intrinsic argument counts differ");
- SmallVector<Type*, 4> Tys;
- SmallVector<Value *, 4> VecArgs;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && i == 1) {
- Tys.push_back(CI->getArgOperand(i)->getType());
- VecArgs.push_back(CI->getArgOperand(i));
- }
- else {
- Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
- CJ->getArgOperand(i)->getType()));
- // Add both operands, and then count their scalarization overhead
- // with VF 1.
- VecArgs.push_back(CI->getArgOperand(i));
- VecArgs.push_back(CJ->getArgOperand(i));
- }
- }
-
- // Compute the scalarization cost here with the original operands (to
- // check for uniqueness etc), and then call getIntrinsicInstrCost()
- // with the constructed vector types.
- Type *RetTy = getVecTypeForPair(IT1, JT1);
- unsigned ScalarizationCost = 0;
- if (!RetTy->isVoidTy())
- ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false);
- ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1);
-
- FastMathFlags FMFV = FMFCI;
- FMFV &= FMFCJ;
- unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV,
- ScalarizationCost);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned RetParts = TTI->getNumberOfParts(RetTy);
- if (RetParts > 1)
- return false;
- else if (!RetParts && VCost == ICost + JCost)
- return false;
-
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (!Tys[i]->isVectorTy())
- continue;
-
- unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
- if (NumParts > 1)
- return false;
- else if (!NumParts && VCost == ICost + JCost)
- return false;
- }
-
- CostSavings = ICost + JCost - VCost;
- }
- }
-
- return true;
- }
-
- // Figure out whether or not J uses I and update the users and write-set
- // structures associated with I. Specifically, Users represents the set of
- // instructions that depend on I. WriteSet represents the set
- // of memory locations that are dependent on I. If UpdateUsers is true,
- // and J uses I, then Users is updated to contain J and WriteSet is updated
- // to contain any memory locations to which J writes. The function returns
- // true if J uses I. By default, alias analysis is used to determine
- // whether J reads from memory that overlaps with a location in WriteSet.
- // If LoadMoveSet is not null, then it is a previously-computed map
- // where the key is the memory-based user instruction and the value is
- // the instruction to be compared with I. So, if LoadMoveSet is provided,
- // then the alias analysis is not used. This is necessary because this
- // function is called during the process of moving instructions during
- // vectorization and the results of the alias analysis are not stable during
- // that process.
- bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
- AliasSetTracker &WriteSet, Instruction *I,
- Instruction *J, bool UpdateUsers,
- DenseSet<ValuePair> *LoadMoveSetPairs) {
- bool UsesI = false;
-
- // This instruction may already be marked as a user due, for example, to
- // being a member of a selected pair.
- if (Users.count(J))
- UsesI = true;
-
- if (!UsesI)
- for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
- JU != JE; ++JU) {
- Value *V = *JU;
- if (I == V || Users.count(V)) {
- UsesI = true;
- break;
- }
- }
- if (!UsesI && J->mayReadFromMemory()) {
- if (LoadMoveSetPairs) {
- UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
- } else {
- for (AliasSetTracker::iterator W = WriteSet.begin(),
- WE = WriteSet.end(); W != WE; ++W) {
- if (W->aliasesUnknownInst(J, *AA)) {
- UsesI = true;
- break;
- }
- }
- }
- }
-
- if (UsesI && UpdateUsers) {
- if (J->mayWriteToMemory()) WriteSet.add(J);
- Users.insert(J);
- }
-
- return UsesI;
- }
-
- // This function iterates over all instruction pairs in the provided
- // basic block and collects all candidate pairs for vectorization.
- bool BBVectorize::getCandidatePairs(BasicBlock &BB,
- BasicBlock::iterator &Start,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts, bool NonPow2Len) {
- size_t TotalPairs = 0;
- BasicBlock::iterator E = BB.end();
- if (Start == E) return false;
-
- bool ShouldContinue = false, IAfterStart = false;
- for (BasicBlock::iterator I = Start++; I != E; ++I) {
- if (I == Start) IAfterStart = true;
-
- bool IsSimpleLoadStore;
- if (!isInstVectorizable(&*I, IsSimpleLoadStore))
- continue;
-
- // Look for an instruction with which to pair instruction *I...
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory())
- WriteSet.add(&*I);
-
- bool JAfterStart = IAfterStart;
- BasicBlock::iterator J = std::next(I);
- for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
- if (J == Start)
- JAfterStart = true;
-
- // Determine if J uses I, if so, exit the loop.
- bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
- if (Config.FastDep) {
- // Note: For this heuristic to be effective, independent operations
- // must tend to be intermixed. This is likely to be true from some
- // kinds of grouped loop unrolling (but not the generic LLVM pass),
- // but otherwise may require some kind of reordering pass.
-
- // When using fast dependency analysis,
- // stop searching after first use:
- if (UsesI) break;
- } else {
- if (UsesI) continue;
- }
-
- // J does not use I, and comes before the first use of I, so it can be
- // merged with I if the instructions are compatible.
- int CostSavings, FixedOrder;
- if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
- CostSavings, FixedOrder))
- continue;
-
- // J is a candidate for merging with I.
- if (PairableInsts.empty() ||
- PairableInsts[PairableInsts.size() - 1] != &*I) {
- PairableInsts.push_back(&*I);
- }
-
- CandidatePairs[&*I].push_back(&*J);
- ++TotalPairs;
- if (TTI)
- CandidatePairCostSavings.insert(
- ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
-
- if (FixedOrder == 1)
- FixedOrderPairs.insert(ValuePair(&*I, &*J));
- else if (FixedOrder == -1)
- FixedOrderPairs.insert(ValuePair(&*J, &*I));
-
- // The next call to this function must start after the last instruction
- // selected during this invocation.
- if (JAfterStart) {
- Start = std::next(J);
- IAfterStart = JAfterStart = false;
- }
-
- DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
- << *I << " <-> " << *J << " (cost savings: " <<
- CostSavings << ")\n");
-
- // If we have already found too many pairs, break here and this function
- // will be called again starting after the last instruction selected
- // during this invocation.
- if (PairableInsts.size() >= Config.MaxInsts ||
- TotalPairs >= Config.MaxPairs) {
- ShouldContinue = true;
- break;
- }
- }
-
- if (ShouldContinue)
- break;
- }
-
- DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
- << " instructions with candidate pairs\n");
-
- return ShouldContinue;
- }
-
- // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
- // it looks for pairs such that both members have an input which is an
- // output of PI or PJ.
- void BBVectorize::computePairsConnectedTo(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P) {
- StoreInst *SI, *SJ;
-
- // For each possible pairing for this variable, look at the uses of
- // the first value...
- for (Value::user_iterator I = P.first->user_begin(),
- E = P.first->user_end();
- I != E; ++I) {
- User *UI = *I;
- if (isa<LoadInst>(UI)) {
- // A pair cannot be connected to a load because the load only takes one
- // operand (the address) and it is a scalar even after vectorization.
- continue;
- } else if ((SI = dyn_cast<StoreInst>(UI)) &&
- P.first == SI->getPointerOperand()) {
- // Similarly, a pair cannot be connected to a store through its
- // pointer operand.
- continue;
- }
-
- // For each use of the first variable, look for uses of the second
- // variable...
- for (User *UJ : P.second->users()) {
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.second == SJ->getPointerOperand())
- continue;
-
- // Look for <I, J>:
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
- }
-
- // Look for <J, I>:
- if (CandidatePairsSet.count(ValuePair(UJ, UI))) {
- VPPair VP(P, ValuePair(UJ, UI));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
- }
- }
-
- if (Config.SplatBreaksChain) continue;
- // Look for cases where just the first value in the pair is used by
- // both members of another pair (splatting).
- for (Value::user_iterator J = P.first->user_begin(); J != E; ++J) {
- User *UJ = *J;
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.first == SJ->getPointerOperand())
- continue;
-
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
- }
- }
- }
-
- if (Config.SplatBreaksChain) return;
- // Look for cases where just the second value in the pair is used by
- // both members of another pair (splatting).
- for (Value::user_iterator I = P.second->user_begin(),
- E = P.second->user_end();
- I != E; ++I) {
- User *UI = *I;
- if (isa<LoadInst>(UI))
- continue;
- else if ((SI = dyn_cast<StoreInst>(UI)) &&
- P.second == SI->getPointerOperand())
- continue;
-
- for (Value::user_iterator J = P.second->user_begin(); J != E; ++J) {
- User *UJ = *J;
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.second == SJ->getPointerOperand())
- continue;
-
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
- }
- }
- }
- }
-
- // This function figures out which pairs are connected. Two pairs are
- // connected if some output of the first pair forms an input to both members
- // of the second pair.
- void BBVectorize::computeConnectedPairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes) {
- for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
- PE = PairableInsts.end(); PI != PE; ++PI) {
- DenseMap<Value *, std::vector<Value *> >::iterator PP =
- CandidatePairs.find(*PI);
- if (PP == CandidatePairs.end())
- continue;
-
- for (std::vector<Value *>::iterator P = PP->second.begin(),
- E = PP->second.end(); P != E; ++P)
- computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs,
- PairConnectionTypes, ValuePair(*PI, *P));
- }
-
- DEBUG(size_t TotalPairs = 0;
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
- ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I)
- TotalPairs += I->second.size();
- dbgs() << "BBV: found " << TotalPairs
- << " pair connections.\n");
- }
-
- // This function builds a set of use tuples such that <A, B> is in the set
- // if B is in the use dag of A. If B is in the use dag of A, then B
- // depends on the output of A.
- void BBVectorize::buildDepMap(
- BasicBlock &BB,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &PairableInstUsers) {
- DenseSet<Value *> IsInPair;
- for (DenseMap<Value *, std::vector<Value *> >::iterator C =
- CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) {
- IsInPair.insert(C->first);
- IsInPair.insert(C->second.begin(), C->second.end());
- }
-
- // Iterate through the basic block, recording all users of each
- // pairable instruction.
-
- BasicBlock::iterator E = BB.end(), EL =
- BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
- for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
- if (IsInPair.find(&*I) == IsInPair.end())
- continue;
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory())
- WriteSet.add(&*I);
-
- for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
- (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
-
- if (J == EL)
- break;
- }
-
- for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
- U != E; ++U) {
- if (IsInPair.find(*U) == IsInPair.end()) continue;
- PairableInstUsers.insert(ValuePair(&*I, *U));
- }
-
- if (I == EL)
- break;
- }
- }
-
- // Returns true if an input to pair P is an output of pair Q and also an
- // input of pair Q is an output of pair P. If this is the case, then these
- // two pairs cannot be simultaneously fused.
- bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
- DenseSet<VPPair> *PairableInstUserPairSet) {
- // Two pairs are in conflict if they are mutual Users of eachother.
- bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
- PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
- PairableInstUsers.count(ValuePair(P.second, Q.first)) ||
- PairableInstUsers.count(ValuePair(P.second, Q.second));
- bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) ||
- PairableInstUsers.count(ValuePair(Q.first, P.second)) ||
- PairableInstUsers.count(ValuePair(Q.second, P.first)) ||
- PairableInstUsers.count(ValuePair(Q.second, P.second));
- if (PairableInstUserMap) {
- // FIXME: The expensive part of the cycle check is not so much the cycle
- // check itself but this edge insertion procedure. This needs some
- // profiling and probably a different data structure.
- if (PUsesQ) {
- if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
- (*PairableInstUserMap)[Q].push_back(P);
- }
- if (QUsesP) {
- if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
- (*PairableInstUserMap)[P].push_back(Q);
- }
- }
-
- return (QUsesP && PUsesQ);
- }
-
- // This function walks the use graph of current pairs to see if, starting
- // from P, the walk returns to P.
- bool BBVectorize::pairWillFormCycle(ValuePair P,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<ValuePair> &CurrentPairs) {
- DEBUG(if (DebugCycleCheck)
- dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
- << *P.second << "\n");
- // A lookup table of visisted pairs is kept because the PairableInstUserMap
- // contains non-direct associations.
- DenseSet<ValuePair> Visited;
- SmallVector<ValuePair, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(P);
- do {
- ValuePair QTop = Q.pop_back_val();
- Visited.insert(QTop);
-
- DEBUG(if (DebugCycleCheck)
- dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
- << *QTop.second << "\n");
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- PairableInstUserMap.find(QTop);
- if (QQ == PairableInstUserMap.end())
- continue;
-
- for (std::vector<ValuePair>::iterator C = QQ->second.begin(),
- CE = QQ->second.end(); C != CE; ++C) {
- if (*C == P) {
- DEBUG(dbgs()
- << "BBV: rejected to prevent non-trivial cycle formation: "
- << QTop.first << " <-> " << C->second << "\n");
- return true;
- }
-
- if (CurrentPairs.count(*C) && !Visited.count(*C))
- Q.push_back(*C);
- }
- } while (!Q.empty());
-
- return false;
- }
-
- // This function builds the initial dag of connected pairs with the
- // pair J at the root.
- void BBVectorize::buildInitialDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG, ValuePair J) {
- // Each of these pairs is viewed as the root node of a DAG. The DAG
- // is then walked (depth-first). As this happens, we keep track of
- // the pairs that compose the DAG and the maximum depth of the DAG.
- SmallVector<ValuePairWithDepth, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
- do {
- ValuePairWithDepth QTop = Q.back();
-
- // Push each child onto the queue:
- bool MoreChildren = false;
- size_t MaxChildDepth = QTop.second;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- ConnectedPairs.find(QTop.first);
- if (QQ != ConnectedPairs.end())
- for (std::vector<ValuePair>::iterator k = QQ->second.begin(),
- ke = QQ->second.end(); k != ke; ++k) {
- // Make sure that this child pair is still a candidate:
- if (CandidatePairsSet.count(*k)) {
- DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k);
- if (C == DAG.end()) {
- size_t d = getDepthFactor(k->first);
- Q.push_back(ValuePairWithDepth(*k, QTop.second+d));
- MoreChildren = true;
- } else {
- MaxChildDepth = std::max(MaxChildDepth, C->second);
- }
- }
- }
-
- if (!MoreChildren) {
- // Record the current pair as part of the DAG:
- DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
- Q.pop_back();
- }
- } while (!Q.empty());
- }
-
- // Given some initial dag, prune it by removing conflicting pairs (pairs
- // that cannot be simultaneously chosen for vectorization).
- void BBVectorize::pruneDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG,
- DenseSet<ValuePair> &PrunedDAG, ValuePair J,
- bool UseCycleCheck) {
- SmallVector<ValuePairWithDepth, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
- do {
- ValuePairWithDepth QTop = Q.pop_back_val();
- PrunedDAG.insert(QTop.first);
-
- // Visit each child, pruning as necessary...
- SmallVector<ValuePairWithDepth, 8> BestChildren;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- ConnectedPairs.find(QTop.first);
- if (QQ == ConnectedPairs.end())
- continue;
-
- for (std::vector<ValuePair>::iterator K = QQ->second.begin(),
- KE = QQ->second.end(); K != KE; ++K) {
- DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K);
- if (C == DAG.end()) continue;
-
- // This child is in the DAG, now we need to make sure it is the
- // best of any conflicting children. There could be multiple
- // conflicting children, so first, determine if we're keeping
- // this child, then delete conflicting children as necessary.
-
- // It is also necessary to guard against pairing-induced
- // dependencies. Consider instructions a .. x .. y .. b
- // such that (a,b) are to be fused and (x,y) are to be fused
- // but a is an input to x and b is an output from y. This
- // means that y cannot be moved after b but x must be moved
- // after b for (a,b) to be fused. In other words, after
- // fusing (a,b) we have y .. a/b .. x where y is an input
- // to a/b and x is an output to a/b: x and y can no longer
- // be legally fused. To prevent this condition, we must
- // make sure that a child pair added to the DAG is not
- // both an input and output of an already-selected pair.
-
- // Pairing-induced dependencies can also form from more complicated
- // cycles. The pair vs. pair conflicts are easy to check, and so
- // that is done explicitly for "fast rejection", and because for
- // child vs. child conflicts, we may prefer to keep the current
- // pair in preference to the already-selected child.
- DenseSet<ValuePair> CurrentPairs;
-
- bool CanAdd = true;
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
- = BestChildren.begin(), E2 = BestChildren.end();
- C2 != E2; ++C2) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- if (C2->second >= C->second) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(C2->first);
- }
- }
- if (!CanAdd) continue;
-
- // Even worse, this child could conflict with another node already
- // selected for the DAG. If that is the case, ignore this child.
- for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(),
- E2 = PrunedDAG.end(); T != E2; ++T) {
- if (T->first == C->first.first ||
- T->first == C->first.second ||
- T->second == C->first.first ||
- T->second == C->first.second ||
- pairsConflict(*T, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(*T);
- }
- if (!CanAdd) continue;
-
- // And check the queue too...
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 = Q.begin(),
- E2 = Q.end(); C2 != E2; ++C2) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(C2->first);
- }
- if (!CanAdd) continue;
-
- // Last but not least, check for a conflict with any of the
- // already-chosen pairs.
- for (DenseMap<Value *, Value *>::iterator C2 =
- ChosenPairs.begin(), E2 = ChosenPairs.end();
- C2 != E2; ++C2) {
- if (pairsConflict(*C2, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(*C2);
- }
- if (!CanAdd) continue;
-
- // To check for non-trivial cycles formed by the addition of the
- // current pair we've formed a list of all relevant pairs, now use a
- // graph walk to check for a cycle. We start from the current pair and
- // walk the use dag to see if we again reach the current pair. If we
- // do, then the current pair is rejected.
-
- // FIXME: It may be more efficient to use a topological-ordering
- // algorithm to improve the cycle check. This should be investigated.
- if (UseCycleCheck &&
- pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
- continue;
-
- // This child can be added, but we may have chosen it in preference
- // to an already-selected child. Check for this here, and if a
- // conflict is found, then remove the previously-selected child
- // before adding this one in its place.
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
- = BestChildren.begin(); C2 != BestChildren.end();) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers))
- C2 = BestChildren.erase(C2);
- else
- ++C2;
- }
-
- BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
- }
-
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C
- = BestChildren.begin(), E2 = BestChildren.end();
- C != E2; ++C) {
- size_t DepthF = getDepthFactor(C->first.first);
- Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
- }
- } while (!Q.empty());
- }
-
- // This function finds the best dag of mututally-compatible connected
- // pairs, given the choice of root pairs as an iterator range.
- void BBVectorize::findBestDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
- int &BestEffSize, Value *II, std::vector<Value *>&JJ,
- bool UseCycleCheck) {
- for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
- J != JE; ++J) {
- ValuePair IJ(II, *J);
- if (!CandidatePairsSet.count(IJ))
- continue;
-
- // Before going any further, make sure that this pair does not
- // conflict with any already-selected pairs (see comment below
- // near the DAG pruning for more details).
- DenseSet<ValuePair> ChosenPairSet;
- bool DoesConflict = false;
- for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
- E = ChosenPairs.end(); C != E; ++C) {
- if (pairsConflict(*C, IJ, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet : nullptr)) {
- DoesConflict = true;
- break;
- }
-
- ChosenPairSet.insert(*C);
- }
- if (DoesConflict) continue;
-
- if (UseCycleCheck &&
- pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
- continue;
-
- DenseMap<ValuePair, size_t> DAG;
- buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs,
- PairableInstUsers, ChosenPairs, DAG, IJ);
-
- // Because we'll keep the child with the largest depth, the largest
- // depth is still the same in the unpruned DAG.
- size_t MaxDepth = DAG.lookup(IJ);
-
- DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
- << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
- MaxDepth << " and size " << DAG.size() << "\n");
-
- // At this point the DAG has been constructed, but, may contain
- // contradictory children (meaning that different children of
- // some dag node may be attempting to fuse the same instruction).
- // So now we walk the dag again, in the case of a conflict,
- // keep only the child with the largest depth. To break a tie,
- // favor the first child.
-
- DenseSet<ValuePair> PrunedDAG;
- pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
- PairableInstUsers, PairableInstUserMap,
- PairableInstUserPairSet,
- ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
-
- int EffSize = 0;
- if (TTI) {
- DenseSet<Value *> PrunedDAGInstrs;
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S) {
- PrunedDAGInstrs.insert(S->first);
- PrunedDAGInstrs.insert(S->second);
- }
-
- // The set of pairs that have already contributed to the total cost.
- DenseSet<ValuePair> IncomingPairs;
-
- // If the cost model were perfect, this might not be necessary; but we
- // need to make sure that we don't get stuck vectorizing our own
- // shuffle chains.
- bool HasNontrivialInsts = false;
-
- // The node weights represent the cost savings associated with
- // fusing the pair of instructions.
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S) {
- if (!isa<ShuffleVectorInst>(S->first) &&
- !isa<InsertElementInst>(S->first) &&
- !isa<ExtractElementInst>(S->first))
- HasNontrivialInsts = true;
-
- bool FlipOrder = false;
-
- if (getDepthFactor(S->first)) {
- int ESContrib = CandidatePairCostSavings.find(*S)->second;
- DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
- << *S->first << " <-> " << *S->second << "} = " <<
- ESContrib << "\n");
- EffSize += ESContrib;
- }
-
- // The edge weights contribute in a negative sense: they represent
- // the cost of shuffles.
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS =
- ConnectedPairDeps.find(*S);
- if (SS != ConnectedPairDeps.end()) {
- unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::vector<ValuePair>::iterator T = SS->second.begin(),
- TE = SS->second.end(); T != TE; ++T) {
- VPPair Q(*S, *T);
- if (!PrunedDAG.count(Q.second))
- continue;
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- ++NumDepsDirect;
- else if (R->second == PairConnectionSwap)
- ++NumDepsSwap;
- }
-
- // If there are more swaps than direct connections, then
- // the pair order will be flipped during fusion. So the real
- // number of swaps is the minimum number.
- FlipOrder = !FixedOrderPairs.count(*S) &&
- ((NumDepsSwap > NumDepsDirect) ||
- FixedOrderPairs.count(ValuePair(S->second, S->first)));
-
- for (std::vector<ValuePair>::iterator T = SS->second.begin(),
- TE = SS->second.end(); T != TE; ++T) {
- VPPair Q(*S, *T);
- if (!PrunedDAG.count(Q.second))
- continue;
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- Type *Ty1 = Q.second.first->getType(),
- *Ty2 = Q.second.second->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
- if ((R->second == PairConnectionDirect && FlipOrder) ||
- (R->second == PairConnectionSwap && !FlipOrder) ||
- R->second == PairConnectionSplat) {
- int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, VTy);
-
- if (VTy->getVectorNumElements() == 2) {
- if (R->second == PairConnectionSplat)
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Broadcast, VTy));
- else
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Reverse, VTy));
- }
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *Q.second.first << " <-> " << *Q.second.second <<
- "} -> {" <<
- *S->first << " <-> " << *S->second << "} = " <<
- ESContrib << "\n");
- EffSize -= ESContrib;
- }
- }
- }
-
- // Compute the cost of outgoing edges. We assume that edges outgoing
- // to shuffles, inserts or extracts can be merged, and so contribute
- // no additional cost.
- if (!S->first->getType()->isVoidTy()) {
- Type *Ty1 = S->first->getType(),
- *Ty2 = S->second->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
-
- bool NeedsExtraction = false;
- for (User *U : S->first->users()) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
- // Shuffle can be folded if it has no other input
- if (isa<UndefValue>(SI->getOperand(1)))
- continue;
- }
- if (isa<ExtractElementInst>(U))
- continue;
- if (PrunedDAGInstrs.count(U))
- continue;
- NeedsExtraction = true;
- break;
- }
-
- if (NeedsExtraction) {
- int ESContrib;
- if (Ty1->isVectorTy()) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- Ty1, VTy);
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
- } else
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::ExtractElement, VTy, 0);
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *S->first << "} = " << ESContrib << "\n");
- EffSize -= ESContrib;
- }
-
- NeedsExtraction = false;
- for (User *U : S->second->users()) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
- // Shuffle can be folded if it has no other input
- if (isa<UndefValue>(SI->getOperand(1)))
- continue;
- }
- if (isa<ExtractElementInst>(U))
- continue;
- if (PrunedDAGInstrs.count(U))
- continue;
- NeedsExtraction = true;
- break;
- }
-
- if (NeedsExtraction) {
- int ESContrib;
- if (Ty2->isVectorTy()) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- Ty2, VTy);
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_ExtractSubvector, VTy,
- Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
- } else
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::ExtractElement, VTy, 1);
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *S->second << "} = " << ESContrib << "\n");
- EffSize -= ESContrib;
- }
- }
-
- // Compute the cost of incoming edges.
- if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
- Instruction *S1 = cast<Instruction>(S->first),
- *S2 = cast<Instruction>(S->second);
- for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
- Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
-
- // Combining constants into vector constants (or small vector
- // constants into larger ones are assumed free).
- if (isa<Constant>(O1) && isa<Constant>(O2))
- continue;
-
- if (FlipOrder)
- std::swap(O1, O2);
-
- ValuePair VP = ValuePair(O1, O2);
- ValuePair VPR = ValuePair(O2, O1);
-
- // Internal edges are not handled here.
- if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
- continue;
-
- Type *Ty1 = O1->getType(),
- *Ty2 = O2->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
-
- // Combining vector operations of the same type is also assumed
- // folded with other operations.
- if (Ty1 == Ty2) {
- // If both are insert elements, then both can be widened.
- InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
- *IEO2 = dyn_cast<InsertElementInst>(O2);
- if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
- continue;
- // If both are extract elements, and both have the same input
- // type, then they can be replaced with a shuffle
- ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
- *EIO2 = dyn_cast<ExtractElementInst>(O2);
- if (EIO1 && EIO2 &&
- EIO1->getOperand(0)->getType() ==
- EIO2->getOperand(0)->getType())
- continue;
- // If both are a shuffle with equal operand types and only two
- // unqiue operands, then they can be replaced with a single
- // shuffle
- ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
- *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
- if (SIO1 && SIO2 &&
- SIO1->getOperand(0)->getType() ==
- SIO2->getOperand(0)->getType()) {
- SmallSet<Value *, 4> SIOps;
- SIOps.insert(SIO1->getOperand(0));
- SIOps.insert(SIO1->getOperand(1));
- SIOps.insert(SIO2->getOperand(0));
- SIOps.insert(SIO2->getOperand(1));
- if (SIOps.size() <= 2)
- continue;
- }
- }
-
- int ESContrib;
- // This pair has already been formed.
- if (IncomingPairs.count(VP)) {
- continue;
- } else if (IncomingPairs.count(VPR)) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, VTy);
-
- if (VTy->getVectorNumElements() == 2)
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Reverse, VTy));
- } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, VTy, 0);
- ESContrib += (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, VTy, 1);
- } else if (!Ty1->isVectorTy()) {
- // O1 needs to be inserted into a vector of size O2, and then
- // both need to be shuffled together.
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, Ty2, 0);
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- VTy, Ty2);
- } else if (!Ty2->isVectorTy()) {
- // O2 needs to be inserted into a vector of size O1, and then
- // both need to be shuffled together.
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, Ty1, 0);
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- VTy, Ty1);
- } else {
- Type *TyBig = Ty1, *TySmall = Ty2;
- if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
- std::swap(TyBig, TySmall);
-
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, TyBig);
- if (TyBig != TySmall)
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- TyBig, TySmall);
- }
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
- << *O1 << " <-> " << *O2 << "} = " <<
- ESContrib << "\n");
- EffSize -= ESContrib;
- IncomingPairs.insert(VP);
- }
- }
- }
-
- if (!HasNontrivialInsts) {
- DEBUG(if (DebugPairSelection) dbgs() <<
- "\tNo non-trivial instructions in DAG;"
- " override to zero effective size\n");
- EffSize = 0;
- }
- } else {
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S)
- EffSize += (int) getDepthFactor(S->first);
- }
-
- DEBUG(if (DebugPairSelection)
- dbgs() << "BBV: found pruned DAG for pair {"
- << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
- MaxDepth << " and size " << PrunedDAG.size() <<
- " (effective size: " << EffSize << ")\n");
- if (((TTI && !UseChainDepthWithTI) ||
- MaxDepth >= Config.ReqChainDepth) &&
- EffSize > 0 && EffSize > BestEffSize) {
- BestMaxDepth = MaxDepth;
- BestEffSize = EffSize;
- BestDAG = PrunedDAG;
- }
- }
- }
-
- // Given the list of candidate pairs, this function selects those
- // that will be fused into vector instructions.
- void BBVectorize::choosePairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs) {
- bool UseCycleCheck =
- CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck;
-
- DenseMap<Value *, std::vector<Value *> > CandidatePairs2;
- for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(),
- E = CandidatePairsSet.end(); I != E; ++I) {
- std::vector<Value *> &JJ = CandidatePairs2[I->second];
- if (JJ.empty()) JJ.reserve(32);
- JJ.push_back(I->first);
- }
-
- DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap;
- DenseSet<VPPair> PairableInstUserPairSet;
- for (std::vector<Value *>::iterator I = PairableInsts.begin(),
- E = PairableInsts.end(); I != E; ++I) {
- // The number of possible pairings for this variable:
- size_t NumChoices = CandidatePairs.lookup(*I).size();
- if (!NumChoices) continue;
-
- std::vector<Value *> &JJ = CandidatePairs[*I];
-
- // The best pair to choose and its dag:
- size_t BestMaxDepth = 0;
- int BestEffSize = 0;
- DenseSet<ValuePair> BestDAG;
- findBestDAGFor(CandidatePairs, CandidatePairsSet,
- CandidatePairCostSavings,
- PairableInsts, FixedOrderPairs, PairConnectionTypes,
- ConnectedPairs, ConnectedPairDeps,
- PairableInstUsers, PairableInstUserMap,
- PairableInstUserPairSet, ChosenPairs,
- BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
- UseCycleCheck);
-
- if (BestDAG.empty())
- continue;
-
- // A dag has been chosen (or not) at this point. If no dag was
- // chosen, then this instruction, I, cannot be paired (and is no longer
- // considered).
-
- DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: "
- << *cast<Instruction>(*I) << "\n");
-
- for (DenseSet<ValuePair>::iterator S = BestDAG.begin(),
- SE2 = BestDAG.end(); S != SE2; ++S) {
- // Insert the members of this dag into the list of chosen pairs.
- ChosenPairs.insert(ValuePair(S->first, S->second));
- DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
- *S->second << "\n");
-
- // Remove all candidate pairs that have values in the chosen dag.
- std::vector<Value *> &KK = CandidatePairs[S->first];
- for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end();
- K != KE; ++K) {
- if (*K == S->second)
- continue;
-
- CandidatePairsSet.erase(ValuePair(S->first, *K));
- }
-
- std::vector<Value *> &LL = CandidatePairs2[S->second];
- for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end();
- L != LE; ++L) {
- if (*L == S->first)
- continue;
-
- CandidatePairsSet.erase(ValuePair(*L, S->second));
- }
-
- std::vector<Value *> &MM = CandidatePairs[S->second];
- for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
- M != ME; ++M) {
- assert(*M != S->first && "Flipped pair in candidate list?");
- CandidatePairsSet.erase(ValuePair(S->second, *M));
- }
-
- std::vector<Value *> &NN = CandidatePairs2[S->first];
- for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end();
- N != NE; ++N) {
- assert(*N != S->second && "Flipped pair in candidate list?");
- CandidatePairsSet.erase(ValuePair(*N, S->first));
- }
- }
- }
-
- DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
- }
-
- std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
- unsigned n = 0) {
- if (!I->hasName())
- return "";
-
- return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
- (n > 0 ? "." + utostr(n) : "")).str();
- }
-
- // Returns the value that is to be used as the pointer input to the vector
- // instruction that fuses I with J.
- Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
- Instruction *I, Instruction *J, unsigned o) {
- Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
- int64_t OffsetInElmts;
-
- // Note: the analysis might fail here, that is why the pair order has
- // been precomputed (OffsetInElmts must be unused here).
- (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace,
- OffsetInElmts, false);
-
- // The pointer value is taken to be the one with the lowest offset.
- Value *VPtr = IPtr;
-
- Type *ArgTypeI = IPtr->getType()->getPointerElementType();
- Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType
- = PointerType::get(VArgType,
- IPtr->getType()->getPointerAddressSpace());
- return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
- /* insert before */ I);
- }
-
- void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned MaskOffset, unsigned NumInElem,
- unsigned NumInElem1, unsigned IdxOffset,
- std::vector<Constant*> &Mask) {
- unsigned NumElem1 = J->getType()->getVectorNumElements();
- for (unsigned v = 0; v < NumElem1; ++v) {
- int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
- if (m < 0) {
- Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
- } else {
- unsigned mm = m + (int) IdxOffset;
- if (m >= (int) NumInElem1)
- mm += (int) NumInElem;
-
- Mask[v+MaskOffset] =
- ConstantInt::get(Type::getInt32Ty(Context), mm);
- }
- }
- }
-
- // Returns the value that is to be used as the vector-shuffle mask to the
- // vector instruction that fuses I with J.
- Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
- Instruction *I, Instruction *J) {
- // This is the shuffle mask. We need to append the second
- // mask to the first, and the numbers need to be adjusted.
-
- Type *ArgTypeI = I->getType();
- Type *ArgTypeJ = J->getType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- unsigned NumElemI = ArgTypeI->getVectorNumElements();
-
- // Get the total number of elements in the fused vector type.
- // By definition, this must equal the number of elements in
- // the final mask.
- unsigned NumElem = VArgType->getVectorNumElements();
- std::vector<Constant*> Mask(NumElem);
-
- Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = OpTypeI->getVectorNumElements();
- Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
-
- // The fused vector will be:
- // -----------------------------------------------------
- // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ |
- // -----------------------------------------------------
- // from which we'll extract NumElem total elements (where the first NumElemI
- // of them come from the mask in I and the remainder come from the mask
- // in J.
-
- // For the mask from the first pair...
- fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
- 0, Mask);
-
- // For the mask from the second pair...
- fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
- NumInElemI, Mask);
-
- return ConstantVector::get(Mask);
- }
-
- bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, Value *&LOp,
- unsigned numElemL,
- Type *ArgTypeL, Type *ArgTypeH,
- bool IBeforeJ, unsigned IdxOff) {
- bool ExpandedIEChain = false;
- if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
- // If we have a pure insertelement chain, then this can be rewritten
- // into a chain that directly builds the larger type.
- if (isPureIEChain(LIE)) {
- SmallVector<Value *, 8> VectElemts(numElemL,
- UndefValue::get(ArgTypeL->getScalarType()));
- InsertElementInst *LIENext = LIE;
- do {
- unsigned Idx =
- cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue();
- VectElemts[Idx] = LIENext->getOperand(1);
- } while ((LIENext =
- dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
- LIENext = nullptr;
- Value *LIEPrev = UndefValue::get(ArgTypeH);
- for (unsigned i = 0; i < numElemL; ++i) {
- if (isa<UndefValue>(VectElemts[i])) continue;
- LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
- ConstantInt::get(Type::getInt32Ty(Context),
- i + IdxOff),
- getReplacementName(IBeforeJ ? I : J,
- true, o, i+1));
- LIENext->insertBefore(IBeforeJ ? J : I);
- LIEPrev = LIENext;
- }
-
- LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH);
- ExpandedIEChain = true;
- }
- }
-
- return ExpandedIEChain;
- }
-
- static unsigned getNumScalarElements(Type *Ty) {
- if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
- return VecTy->getNumElements();
- return 1;
- }
-
- // Returns the value to be used as the specified operand of the vector
- // instruction that fuses I with J.
- Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool IBeforeJ) {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
-
- // Compute the fused vector type for this operand
- Type *ArgTypeI = I->getOperand(o)->getType();
- Type *ArgTypeJ = J->getOperand(o)->getType();
- VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- Instruction *L = I, *H = J;
- Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
-
- unsigned numElemL = getNumScalarElements(ArgTypeL);
- unsigned numElemH = getNumScalarElements(ArgTypeH);
-
- Value *LOp = L->getOperand(o);
- Value *HOp = H->getOperand(o);
- unsigned numElem = VArgType->getNumElements();
-
- // First, we check if we can reuse the "original" vector outputs (if these
- // exist). We might need a shuffle.
- ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp);
- ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp);
- ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp);
- ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp);
-
- // FIXME: If we're fusing shuffle instructions, then we can't apply this
- // optimization. The input vectors to the shuffle might be a different
- // length from the shuffle outputs. Unfortunately, the replacement
- // shuffle mask has already been formed, and the mask entries are sensitive
- // to the sizes of the inputs.
- bool IsSizeChangeShuffle =
- isa<ShuffleVectorInst>(L) &&
- (LOp->getType() != L->getType() || HOp->getType() != H->getType());
-
- if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
- // We can have at most two unique vector inputs.
- bool CanUseInputs = true;
- Value *I1, *I2 = nullptr;
- if (LEE) {
- I1 = LEE->getOperand(0);
- } else {
- I1 = LSV->getOperand(0);
- I2 = LSV->getOperand(1);
- if (I2 == I1 || isa<UndefValue>(I2))
- I2 = nullptr;
- }
-
- if (HEE) {
- Value *I3 = HEE->getOperand(0);
- if (!I2 && I3 != I1)
- I2 = I3;
- else if (I3 != I1 && I3 != I2)
- CanUseInputs = false;
- } else {
- Value *I3 = HSV->getOperand(0);
- if (!I2 && I3 != I1)
- I2 = I3;
- else if (I3 != I1 && I3 != I2)
- CanUseInputs = false;
-
- if (CanUseInputs) {
- Value *I4 = HSV->getOperand(1);
- if (!isa<UndefValue>(I4)) {
- if (!I2 && I4 != I1)
- I2 = I4;
- else if (I4 != I1 && I4 != I2)
- CanUseInputs = false;
- }
- }
- }
-
- if (CanUseInputs) {
- unsigned LOpElem =
- cast<Instruction>(LOp)->getOperand(0)->getType()
- ->getVectorNumElements();
-
- unsigned HOpElem =
- cast<Instruction>(HOp)->getOperand(0)->getType()
- ->getVectorNumElements();
-
- // We have one or two input vectors. We need to map each index of the
- // operands to the index of the original vector.
- SmallVector<std::pair<int, int>, 8> II(numElem);
- for (unsigned i = 0; i < numElemL; ++i) {
- int Idx, INum;
- if (LEE) {
- Idx =
- cast<ConstantInt>(LEE->getOperand(1))->getSExtValue();
- INum = LEE->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx = LSV->getMaskValue(i);
- if (Idx < (int) LOpElem) {
- INum = LSV->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx -= LOpElem;
- INum = LSV->getOperand(1) == I1 ? 0 : 1;
- }
- }
-
- II[i] = std::pair<int, int>(Idx, INum);
- }
- for (unsigned i = 0; i < numElemH; ++i) {
- int Idx, INum;
- if (HEE) {
- Idx =
- cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
- INum = HEE->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx = HSV->getMaskValue(i);
- if (Idx < (int) HOpElem) {
- INum = HSV->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx -= HOpElem;
- INum = HSV->getOperand(1) == I1 ? 0 : 1;
- }
- }
-
- II[i + numElemL] = std::pair<int, int>(Idx, INum);
- }
-
- // We now have an array which tells us from which index of which
- // input vector each element of the operand comes.
- VectorType *I1T = cast<VectorType>(I1->getType());
- unsigned I1Elem = I1T->getNumElements();
-
- if (!I2) {
- // In this case there is only one underlying vector input. Check for
- // the trivial case where we can use the input directly.
- if (I1Elem == numElem) {
- bool ElemInOrder = true;
- for (unsigned i = 0; i < numElem; ++i) {
- if (II[i].first != (int) i && II[i].first != -1) {
- ElemInOrder = false;
- break;
- }
- }
-
- if (ElemInOrder)
- return I1;
- }
-
- // A shuffle is needed.
- std::vector<Constant *> Mask(numElem);
- for (unsigned i = 0; i < numElem; ++i) {
- int Idx = II[i].first;
- if (Idx == -1)
- Mask[i] = UndefValue::get(Type::getInt32Ty(Context));
- else
- Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
-
- Instruction *S =
- new ShuffleVectorInst(I1, UndefValue::get(I1T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- }
-
- VectorType *I2T = cast<VectorType>(I2->getType());
- unsigned I2Elem = I2T->getNumElements();
-
- // This input comes from two distinct vectors. The first step is to
- // make sure that both vectors are the same length. If not, the
- // smaller one will need to grow before they can be shuffled together.
- if (I1Elem < I2Elem) {
- std::vector<Constant *> Mask(I2Elem);
- unsigned v = 0;
- for (; v < I1Elem; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < I2Elem; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- Instruction *NewI1 =
- new ShuffleVectorInst(I1, UndefValue::get(I1T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- NewI1->insertBefore(IBeforeJ ? J : I);
- I1 = NewI1;
- I1Elem = I2Elem;
- } else if (I1Elem > I2Elem) {
- std::vector<Constant *> Mask(I1Elem);
- unsigned v = 0;
- for (; v < I2Elem; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < I1Elem; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- Instruction *NewI2 =
- new ShuffleVectorInst(I2, UndefValue::get(I2T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- NewI2->insertBefore(IBeforeJ ? J : I);
- I2 = NewI2;
- }
-
- // Now that both I1 and I2 are the same length we can shuffle them
- // together (and use the result).
- std::vector<Constant *> Mask(numElem);
- for (unsigned v = 0; v < numElem; ++v) {
- if (II[v].first == -1) {
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
- } else {
- int Idx = II[v].first + II[v].second * I1Elem;
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
- }
-
- Instruction *NewOp =
- new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J, true, o));
- NewOp->insertBefore(IBeforeJ ? J : I);
- return NewOp;
- }
- }
-
- Type *ArgType = ArgTypeL;
- if (numElemL < numElemH) {
- if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
- ArgTypeL, VArgType, IBeforeJ, 1)) {
- // This is another short-circuit case: we're combining a scalar into
- // a vector that is formed by an IE chain. We've just expanded the IE
- // chain, now insert the scalar and we're done.
-
- Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
- getReplacementName(IBeforeJ ? I : J, true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
- ArgTypeH, IBeforeJ)) {
- // The two vector inputs to the shuffle must be the same length,
- // so extend the smaller vector to be the same length as the larger one.
- Instruction *NLOp;
- if (numElemL > 1) {
-
- std::vector<Constant *> Mask(numElemH);
- unsigned v = 0;
- for (; v < numElemL; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < numElemH; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- } else {
- NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- }
-
- NLOp->insertBefore(IBeforeJ ? J : I);
- LOp = NLOp;
- }
-
- ArgType = ArgTypeH;
- } else if (numElemL > numElemH) {
- if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
- ArgTypeH, VArgType, IBeforeJ)) {
- Instruction *S =
- InsertElementInst::Create(LOp, HOp,
- ConstantInt::get(Type::getInt32Ty(Context),
- numElemL),
- getReplacementName(IBeforeJ ? I : J,
- true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
- ArgTypeL, IBeforeJ)) {
- Instruction *NHOp;
- if (numElemH > 1) {
- std::vector<Constant *> Mask(numElemL);
- unsigned v = 0;
- for (; v < numElemH; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < numElemL; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- } else {
- NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- }
-
- NHOp->insertBefore(IBeforeJ ? J : I);
- HOp = NHOp;
- }
- }
-
- if (ArgType->isVectorTy()) {
- unsigned numElem = VArgType->getVectorNumElements();
- std::vector<Constant*> Mask(numElem);
- for (unsigned v = 0; v < numElem; ++v) {
- unsigned Idx = v;
- // If the low vector was expanded, we need to skip the extra
- // undefined entries.
- if (v >= numElemL && numElemH > numElemL)
- Idx += (numElemH - numElemL);
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
-
- Instruction *BV = new ShuffleVectorInst(LOp, HOp,
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J, true, o));
- BV->insertBefore(IBeforeJ ? J : I);
- return BV;
- }
-
- Instruction *BV1 = InsertElementInst::Create(
- UndefValue::get(VArgType), LOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- BV1->insertBefore(IBeforeJ ? J : I);
- Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 2));
- BV2->insertBefore(IBeforeJ ? J : I);
- return BV2;
- }
-
- // This function creates an array of values that will be used as the inputs
- // to the vector instruction that fuses I with J.
- void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
- Instruction *I, Instruction *J,
- SmallVectorImpl<Value *> &ReplacedOperands,
- bool IBeforeJ) {
- unsigned NumOperands = I->getNumOperands();
-
- for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
- // Iterate backward so that we look at the store pointer
- // first and know whether or not we need to flip the inputs.
-
- if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
- // This is the pointer for a load/store instruction.
- ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
- continue;
- } else if (isa<CallInst>(I)) {
- Function *F = cast<CallInst>(I)->getCalledFunction();
- Intrinsic::ID IID = F->getIntrinsicID();
- if (o == NumOperands-1) {
- BasicBlock &BB = *I->getParent();
-
- Module *M = BB.getParent()->getParent();
- Type *ArgTypeI = I->getType();
- Type *ArgTypeJ = J->getType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
- continue;
- } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && o == 1) {
- // The second argument of powi/ctlz/cttz is a single integer/constant
- // and we've already checked that both arguments are equal.
- // As a result, we just keep I's second argument.
- ReplacedOperands[o] = I->getOperand(o);
- continue;
- }
- } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
- ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
- continue;
- }
-
- ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
- }
- }
-
- // This function creates two values that represent the outputs of the
- // original I and J instructions. These are generally vector shuffles
- // or extracts. In many cases, these will end up being unused and, thus,
- // eliminated by later passes.
- void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
- Instruction *J, Instruction *K,
- Instruction *&InsertionPt,
- Instruction *&K1, Instruction *&K2) {
- if (isa<StoreInst>(I))
- return;
-
- Type *IType = I->getType();
- Type *JType = J->getType();
-
- VectorType *VType = getVecTypeForPair(IType, JType);
- unsigned numElem = VType->getNumElements();
-
- unsigned numElemI = getNumScalarElements(IType);
- unsigned numElemJ = getNumScalarElements(JType);
-
- if (IType->isVectorTy()) {
- std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
- for (unsigned v = 0; v < numElemI; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v);
- }
-
- K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(Mask1),
- getReplacementName(K, false, 1));
- } else {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1));
- }
-
- if (JType->isVectorTy()) {
- std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
- for (unsigned v = 0; v < numElemJ; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v);
- }
-
- K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(Mask2),
- getReplacementName(K, false, 2));
- } else {
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1);
- K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2));
- }
-
- K1->insertAfter(K);
- K2->insertAfter(K1);
- InsertionPt = K2;
- }
-
- // Move all uses of the function I (including pairing-induced uses) after J.
- bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I, Instruction *J) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- for (; cast<Instruction>(L) != J; ++L)
- (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs);
-
- assert(cast<Instruction>(L) == J &&
- "Tracking has not proceeded far enough to check for dependencies");
- // If J is now in the use set of I, then trackUsesOfI will return true
- // and we have a dependency cycle (and the fusing operation must abort).
- return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
- }
-
- // Move all uses of the function I (including pairing-induced uses) after J.
- void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *&InsertionPt,
- Instruction *I, Instruction *J) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- for (; cast<Instruction>(L) != J;) {
- if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) {
- // Move this instruction
- Instruction *InstToMove = &*L++;
-
- DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
- " to after " << *InsertionPt << "\n");
- InstToMove->removeFromParent();
- InstToMove->insertAfter(InsertionPt);
- InsertionPt = InstToMove;
- } else {
- ++L;
- }
- }
- }
-
- // Collect all load instruction that are in the move set of a given first
- // pair member. These loads depend on the first instruction, I, and so need
- // to be moved after J (the second instruction) when the pair is fused.
- void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- // Note: We cannot end the loop when we reach J because J could be moved
- // farther down the use chain by another instruction pairing. Also, J
- // could be before I if this is an inverted input.
- for (BasicBlock::iterator E = BB.end(); L != E; ++L) {
- if (trackUsesOfI(Users, WriteSet, I, &*L)) {
- if (L->mayReadFromMemory()) {
- LoadMoveSet[&*L].push_back(I);
- LoadMoveSetPairs.insert(ValuePair(&*L, I));
- }
- }
- }
- }
-
- // In cases where both load/stores and the computation of their pointers
- // are chosen for vectorization, we can end up in a situation where the
- // aliasing analysis starts returning different query results as the
- // process of fusing instruction pairs continues. Because the algorithm
- // relies on finding the same use dags here as were found earlier, we'll
- // need to precompute the necessary aliasing information here and then
- // manually update it during the fusion process.
- void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs) {
- for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
- PIE = PairableInsts.end(); PI != PIE; ++PI) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
- if (P == ChosenPairs.end()) continue;
-
- Instruction *I = cast<Instruction>(P->first);
- collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
- LoadMoveSetPairs, I);
- }
- }
-
- // This function fuses the chosen instruction pairs into vector instructions,
- // taking care preserve any needed scalar outputs and, then, it reorders the
- // remaining instructions as needed (users of the first member of the pair
- // need to be moved to after the location of the second member of the pair
- // because the vector instruction is inserted in the location of the pair's
- // second member).
- void BBVectorize::fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
- LLVMContext& Context = BB.getContext();
-
- // During the vectorization process, the order of the pairs to be fused
- // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
- // list. After a pair is fused, the flipped pair is removed from the list.
- DenseSet<ValuePair> FlippedPairs;
- for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
- E = ChosenPairs.end(); P != E; ++P)
- FlippedPairs.insert(ValuePair(P->second, P->first));
- for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
- E = FlippedPairs.end(); P != E; ++P)
- ChosenPairs.insert(*P);
-
- DenseMap<Value *, std::vector<Value *> > LoadMoveSet;
- DenseSet<ValuePair> LoadMoveSetPairs;
- collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
- LoadMoveSet, LoadMoveSetPairs);
-
- DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
-
- for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(&*PI);
- if (P == ChosenPairs.end()) {
- ++PI;
- continue;
- }
-
- if (getDepthFactor(P->first) == 0) {
- // These instructions are not really fused, but are tracked as though
- // they are. Any case in which it would be interesting to fuse them
- // will be taken care of by InstCombine.
- --NumFusedOps;
- ++PI;
- continue;
- }
-
- Instruction *I = cast<Instruction>(P->first),
- *J = cast<Instruction>(P->second);
-
- DEBUG(dbgs() << "BBV: fusing: " << *I <<
- " <-> " << *J << "\n");
-
- // Remove the pair and flipped pair from the list.
- DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
- assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
- ChosenPairs.erase(FP);
- ChosenPairs.erase(P);
-
- if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
- DEBUG(dbgs() << "BBV: fusion of: " << *I <<
- " <-> " << *J <<
- " aborted because of non-trivial dependency cycle\n");
- --NumFusedOps;
- ++PI;
- continue;
- }
-
- // If the pair must have the other order, then flip it.
- bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
- if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
- // This pair does not have a fixed order, and so we might want to
- // flip it if that will yield fewer shuffles. We count the number
- // of dependencies connected via swaps, and those directly connected,
- // and flip the order if the number of swaps is greater.
- bool OrigOrder = true;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ =
- ConnectedPairDeps.find(ValuePair(I, J));
- if (IJ == ConnectedPairDeps.end()) {
- IJ = ConnectedPairDeps.find(ValuePair(J, I));
- OrigOrder = false;
- }
-
- if (IJ != ConnectedPairDeps.end()) {
- unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::vector<ValuePair>::iterator T = IJ->second.begin(),
- TE = IJ->second.end(); T != TE; ++T) {
- VPPair Q(IJ->first, *T);
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- ++NumDepsDirect;
- else if (R->second == PairConnectionSwap)
- ++NumDepsSwap;
- }
-
- if (!OrigOrder)
- std::swap(NumDepsDirect, NumDepsSwap);
-
- if (NumDepsSwap > NumDepsDirect) {
- FlipPairOrder = true;
- DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
- " <-> " << *J << "\n");
- }
- }
- }
-
- Instruction *L = I, *H = J;
- if (FlipPairOrder)
- std::swap(H, L);
-
- // If the pair being fused uses the opposite order from that in the pair
- // connection map, then we need to flip the types.
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL =
- ConnectedPairs.find(ValuePair(H, L));
- if (HL != ConnectedPairs.end())
- for (std::vector<ValuePair>::iterator T = HL->second.begin(),
- TE = HL->second.end(); T != TE; ++T) {
- VPPair Q(HL->first, *T);
- DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q);
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- R->second = PairConnectionSwap;
- else if (R->second == PairConnectionSwap)
- R->second = PairConnectionDirect;
- }
-
- bool LBeforeH = !FlipPairOrder;
- unsigned NumOperands = I->getNumOperands();
- SmallVector<Value *, 3> ReplacedOperands(NumOperands);
- getReplacementInputsForPair(Context, L, H, ReplacedOperands,
- LBeforeH);
-
- // Make a copy of the original operation, change its type to the vector
- // type and replace its operands with the vector operands.
- Instruction *K = L->clone();
- if (L->hasName())
- K->takeName(L);
- else if (H->hasName())
- K->takeName(H);
-
- if (auto CS = CallSite(K)) {
- SmallVector<Type *, 3> Tys;
- FunctionType *Old = CS.getFunctionType();
- unsigned NumOld = Old->getNumParams();
- assert(NumOld <= ReplacedOperands.size());
- for (unsigned i = 0; i != NumOld; ++i)
- Tys.push_back(ReplacedOperands[i]->getType());
- CS.mutateFunctionType(
- FunctionType::get(getVecTypeForPair(L->getType(), H->getType()),
- Tys, Old->isVarArg()));
- } else if (!isa<StoreInst>(K))
- K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
-
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_group};
- combineMetadata(K, H, KnownIDs);
- K->andIRFlags(H);
-
- for (unsigned o = 0; o < NumOperands; ++o)
- K->setOperand(o, ReplacedOperands[o]);
-
- K->insertAfter(J);
-
- // Instruction insertion point:
- Instruction *InsertionPt = K;
- Instruction *K1 = nullptr, *K2 = nullptr;
- replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
-
- // The use dag of the first original instruction must be moved to after
- // the location of the second instruction. The entire use dag of the
- // first instruction is disjoint from the input dag of the second
- // (by definition), and so commutes with it.
-
- moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
-
- if (!isa<StoreInst>(I)) {
- L->replaceAllUsesWith(K1);
- H->replaceAllUsesWith(K2);
- }
-
- // Instructions that may read from memory may be in the load move set.
- // Once an instruction is fused, we no longer need its move set, and so
- // the values of the map never need to be updated. However, when a load
- // is fused, we need to merge the entries from both instructions in the
- // pair in case those instructions were in the move set of some other
- // yet-to-be-fused pair. The loads in question are the keys of the map.
- if (I->mayReadFromMemory()) {
- std::vector<ValuePair> NewSetMembers;
- DenseMap<Value *, std::vector<Value *> >::iterator II =
- LoadMoveSet.find(I);
- if (II != LoadMoveSet.end())
- for (std::vector<Value *>::iterator N = II->second.begin(),
- NE = II->second.end(); N != NE; ++N)
- NewSetMembers.push_back(ValuePair(K, *N));
- DenseMap<Value *, std::vector<Value *> >::iterator JJ =
- LoadMoveSet.find(J);
- if (JJ != LoadMoveSet.end())
- for (std::vector<Value *>::iterator N = JJ->second.begin(),
- NE = JJ->second.end(); N != NE; ++N)
- NewSetMembers.push_back(ValuePair(K, *N));
- for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
- AE = NewSetMembers.end(); A != AE; ++A) {
- LoadMoveSet[A->first].push_back(A->second);
- LoadMoveSetPairs.insert(*A);
- }
- }
-
- // Before removing I, set the iterator to the next instruction.
- PI = std::next(BasicBlock::iterator(I));
- if (cast<Instruction>(PI) == J)
- ++PI;
-
- SE->forgetValue(I);
- SE->forgetValue(J);
- I->eraseFromParent();
- J->eraseFromParent();
-
- DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
- BB << "\n");
- }
-
- DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
- }
-}
-
-char BBVectorize::ID = 0;
-static const char bb_vectorize_name[] = "Basic-Block Vectorization";
-INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
-INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-
-BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
- return new BBVectorize(C);
-}
-
-bool
-llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
- BBVectorize BBVectorizer(P, *BB.getParent(), C);
- return BBVectorizer.vectorizeBB(BB);
-}
-
-//===----------------------------------------------------------------------===//
-VectorizeConfig::VectorizeConfig() {
- VectorBits = ::VectorBits;
- VectorizeBools = !::NoBools;
- VectorizeInts = !::NoInts;
- VectorizeFloats = !::NoFloats;
- VectorizePointers = !::NoPointers;
- VectorizeCasts = !::NoCasts;
- VectorizeMath = !::NoMath;
- VectorizeBitManipulations = !::NoBitManipulation;
- VectorizeFMA = !::NoFMA;
- VectorizeSelect = !::NoSelect;
- VectorizeCmp = !::NoCmp;
- VectorizeGEP = !::NoGEP;
- VectorizeMemOps = !::NoMemOps;
- AlignedOnly = ::AlignedOnly;
- ReqChainDepth= ::ReqChainDepth;
- SearchLimit = ::SearchLimit;
- MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
- SplatBreaksChain = ::SplatBreaksChain;
- MaxInsts = ::MaxInsts;
- MaxPairs = ::MaxPairs;
- MaxIter = ::MaxIter;
- Pow2LenOnly = ::Pow2LenOnly;
- NoMemOpBoost = ::NoMemOpBoost;
- FastDep = ::FastDep;
-}
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index 395f440bda47..1aea73cd4a32 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMVectorize
- BBVectorize.cpp
LoadStoreVectorizer.cpp
LoopVectorize.cpp
SLPVectorizer.cpp
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index eac2867233bc..193cc4d13787 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -114,12 +114,13 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
-/// We don't vectorize loops with a known constant trip count below this number.
+/// Loops with a known constant trip count below this number are vectorized only
+/// if no scalar iteration overheads are incurred.
static cl::opt<unsigned> TinyTripCountVectorThreshold(
"vectorizer-min-trip-count", cl::init(16), cl::Hidden,
- cl::desc("Don't vectorize loops with a constant "
- "trip count that is smaller than this "
- "value."));
+ cl::desc("Loops with a constant trip count that is smaller than this "
+ "value are vectorized only if no scalar iteration overheads "
+ "are incurred."));
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -532,21 +533,34 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// Return a constant reference to the VectorParts corresponding to \p V from
- /// the original loop. If the value has already been vectorized, the
- /// corresponding vector entry in VectorLoopValueMap is returned. If,
+ /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
+ /// vector or scalar value on-demand if one is not yet available. When
+ /// vectorizing a loop, we visit the definition of an instruction before its
+ /// uses. When visiting the definition, we either vectorize or scalarize the
+ /// instruction, creating an entry for it in the corresponding map. (In some
+ /// cases, such as induction variables, we will create both vector and scalar
+ /// entries.) Then, as we encounter uses of the definition, we derive values
+ /// for each scalar or vector use unless such a value is already available.
+ /// For example, if we scalarize a definition and one of its uses is vector,
+ /// we build the required vector on-demand with an insertelement sequence
+ /// when visiting the use. Otherwise, if the use is scalar, we can use the
+ /// existing scalar definition.
+ ///
+ /// Return a value in the new loop corresponding to \p V from the original
+ /// loop at unroll index \p Part. If the value has already been vectorized,
+ /// the corresponding vector entry in VectorLoopValueMap is returned. If,
/// however, the value has a scalar entry in VectorLoopValueMap, we construct
- /// new vector values on-demand by inserting the scalar values into vectors
+ /// a new vector value on-demand by inserting the scalar values into a vector
/// with an insertelement sequence. If the value has been neither vectorized
/// nor scalarized, it must be loop invariant, so we simply broadcast the
- /// value into vectors.
- const VectorParts &getVectorValue(Value *V);
+ /// value into a vector.
+ Value *getOrCreateVectorValue(Value *V, unsigned Part);
/// Return a value in the new loop corresponding to \p V from the original
/// loop at unroll index \p Part and vector index \p Lane. If the value has
/// been vectorized but not scalarized, the necessary extractelement
/// instruction will be generated.
- Value *getScalarValue(Value *V, unsigned Part, unsigned Lane);
+ Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane);
/// Try to vectorize the interleaved access group that \p Instr belongs to.
void vectorizeInterleaveGroup(Instruction *Instr);
@@ -601,90 +615,103 @@ protected:
/// UF x VF scalar values in the new loop. UF and VF are the unroll and
/// vectorization factors, respectively.
///
- /// Entries can be added to either map with initVector and initScalar, which
- /// initialize and return a constant reference to the new entry. If a
- /// non-constant reference to a vector entry is required, getVector can be
- /// used to retrieve a mutable entry. We currently directly modify the mapped
- /// values during "fix-up" operations that occur once the first phase of
- /// widening is complete. These operations include type truncation and the
- /// second phase of recurrence widening.
+ /// Entries can be added to either map with setVectorValue and setScalarValue,
+ /// which assert that an entry was not already added before. If an entry is to
+ /// replace an existing one, call resetVectorValue. This is currently needed
+ /// to modify the mapped values during "fix-up" operations that occur once the
+ /// first phase of widening is complete. These operations include type
+ /// truncation and the second phase of recurrence widening.
///
- /// Otherwise, entries from either map should be accessed using the
- /// getVectorValue or getScalarValue functions from InnerLoopVectorizer.
- /// getVectorValue and getScalarValue coordinate to generate a vector or
- /// scalar value on-demand if one is not yet available. When vectorizing a
- /// loop, we visit the definition of an instruction before its uses. When
- /// visiting the definition, we either vectorize or scalarize the
- /// instruction, creating an entry for it in the corresponding map. (In some
- /// cases, such as induction variables, we will create both vector and scalar
- /// entries.) Then, as we encounter uses of the definition, we derive values
- /// for each scalar or vector use unless such a value is already available.
- /// For example, if we scalarize a definition and one of its uses is vector,
- /// we build the required vector on-demand with an insertelement sequence
- /// when visiting the use. Otherwise, if the use is scalar, we can use the
- /// existing scalar definition.
+ /// Entries from either map can be retrieved using the getVectorValue and
+ /// getScalarValue functions, which assert that the desired value exists.
+
struct ValueMap {
/// Construct an empty map with the given unroll and vectorization factors.
- ValueMap(unsigned UnrollFactor, unsigned VecWidth)
- : UF(UnrollFactor), VF(VecWidth) {
- // The unroll and vectorization factors are only used in asserts builds
- // to verify map entries are sized appropriately.
- (void)UF;
- (void)VF;
+ ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
+
+ /// \return True if the map has any vector entry for \p Key.
+ bool hasAnyVectorValue(Value *Key) const {
+ return VectorMapStorage.count(Key);
+ }
+
+ /// \return True if the map has a vector entry for \p Key and \p Part.
+ bool hasVectorValue(Value *Key, unsigned Part) const {
+ assert(Part < UF && "Queried Vector Part is too large.");
+ if (!hasAnyVectorValue(Key))
+ return false;
+ const VectorParts &Entry = VectorMapStorage.find(Key)->second;
+ assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
+ return Entry[Part] != nullptr;
}
- /// \return True if the map has a vector entry for \p Key.
- bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); }
-
- /// \return True if the map has a scalar entry for \p Key.
- bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); }
-
- /// \brief Map \p Key to the given VectorParts \p Entry, and return a
- /// constant reference to the new vector map entry. The given key should
- /// not already be in the map, and the given VectorParts should be
- /// correctly sized for the current unroll factor.
- const VectorParts &initVector(Value *Key, const VectorParts &Entry) {
- assert(!hasVector(Key) && "Vector entry already initialized");
- assert(Entry.size() == UF && "VectorParts has wrong dimensions");
- VectorMapStorage[Key] = Entry;
- return VectorMapStorage[Key];
+ /// \return True if the map has any scalar entry for \p Key.
+ bool hasAnyScalarValue(Value *Key) const {
+ return ScalarMapStorage.count(Key);
}
- /// \brief Map \p Key to the given ScalarParts \p Entry, and return a
- /// constant reference to the new scalar map entry. The given key should
- /// not already be in the map, and the given ScalarParts should be
- /// correctly sized for the current unroll and vectorization factors.
- const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) {
- assert(!hasScalar(Key) && "Scalar entry already initialized");
- assert(Entry.size() == UF &&
- all_of(make_range(Entry.begin(), Entry.end()),
- [&](const SmallVectorImpl<Value *> &Values) -> bool {
- return Values.size() == VF;
- }) &&
- "ScalarParts has wrong dimensions");
- ScalarMapStorage[Key] = Entry;
- return ScalarMapStorage[Key];
+ /// \return True if the map has a scalar entry for \p Key, \p Part and
+ /// \p Part.
+ bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const {
+ assert(Part < UF && "Queried Scalar Part is too large.");
+ assert(Lane < VF && "Queried Scalar Lane is too large.");
+ if (!hasAnyScalarValue(Key))
+ return false;
+ const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
+ assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
+ assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions.");
+ return Entry[Part][Lane] != nullptr;
}
- /// \return A reference to the vector map entry corresponding to \p Key.
- /// The key should already be in the map. This function should only be used
- /// when it's necessary to update values that have already been vectorized.
- /// This is the case for "fix-up" operations including type truncation and
- /// the second phase of recurrence vectorization. If a non-const reference
- /// isn't required, getVectorValue should be used instead.
- VectorParts &getVector(Value *Key) {
- assert(hasVector(Key) && "Vector entry not initialized");
- return VectorMapStorage.find(Key)->second;
+ /// Retrieve the existing vector value that corresponds to \p Key and
+ /// \p Part.
+ Value *getVectorValue(Value *Key, unsigned Part) {
+ assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
+ return VectorMapStorage[Key][Part];
}
- /// Retrieve an entry from the vector or scalar maps. The preferred way to
- /// access an existing mapped entry is with getVectorValue or
- /// getScalarValue from InnerLoopVectorizer. Until those functions can be
- /// moved inside ValueMap, we have to declare them as friends.
- friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V);
- friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
- unsigned Lane);
+ /// Retrieve the existing scalar value that corresponds to \p Key, \p Part
+ /// and \p Lane.
+ Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) {
+ assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value.");
+ return ScalarMapStorage[Key][Part][Lane];
+ }
+
+ /// Set a vector value associated with \p Key and \p Part. Assumes such a
+ /// value is not already set. If it is, use resetVectorValue() instead.
+ void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
+ assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
+ if (!VectorMapStorage.count(Key)) {
+ VectorParts Entry(UF);
+ VectorMapStorage[Key] = Entry;
+ }
+ VectorMapStorage[Key][Part] = Vector;
+ }
+
+ /// Set a scalar value associated with \p Key for \p Part and \p Lane.
+ /// Assumes such a value is not already set.
+ void setScalarValue(Value *Key, unsigned Part, unsigned Lane,
+ Value *Scalar) {
+ assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set");
+ if (!ScalarMapStorage.count(Key)) {
+ ScalarParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part].resize(VF, nullptr);
+ // TODO: Consider storing uniform values only per-part, as they occupy
+ // lane 0 only, keeping the other VF-1 redundant entries null.
+ ScalarMapStorage[Key] = Entry;
+ }
+ ScalarMapStorage[Key][Part][Lane] = Scalar;
+ }
+
+ /// Reset the vector value associated with \p Key for the given \p Part.
+ /// This function can be used to update values that have already been
+ /// vectorized. This is the case for "fix-up" operations including type
+ /// truncation and the second phase of recurrence vectorization.
+ void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
+ assert(hasVectorValue(Key, Part) && "Vector value not set for part");
+ VectorMapStorage[Key][Part] = Vector;
+ }
private:
/// The unroll factor. Each entry in the vector map contains UF vector
@@ -1577,6 +1604,9 @@ public:
/// Return the first-order recurrences found in the loop.
RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
+ /// Return the set of instructions to sink to handle first-order recurrences.
+ DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
+
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
@@ -1779,6 +1809,9 @@ private:
InductionList Inductions;
/// Holds the phi nodes that are first-order recurrences.
RecurrenceSet FirstOrderRecurrences;
+ /// Holds instructions that need to sink past other instructions to handle
+ /// first-order recurrences.
+ DenseMap<Instruction *, Instruction *> SinkAfter;
/// Holds the widest induction type encountered.
Type *WidestIndTy;
@@ -2417,15 +2450,13 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
&*LoopVectorBody->getFirstInsertionPt());
Instruction *LastInduction = VecInd;
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part] = LastInduction;
+ VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
+ if (isa<TruncInst>(EntryVal))
+ addMetadata(LastInduction, EntryVal);
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
- VectorLoopValueMap.initVector(EntryVal, Entry);
- if (isa<TruncInst>(EntryVal))
- addMetadata(Entry, EntryVal);
// Move the last step to the end of the latch block. This ensures consistent
// placement of all induction updates.
@@ -2531,13 +2562,13 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// induction variable, and build the necessary step vectors.
if (!VectorizedIV) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] =
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
- VectorLoopValueMap.initVector(EntryVal, Entry);
- if (Trunc)
- addMetadata(Entry, Trunc);
+ VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
+ if (Trunc)
+ addMetadata(EntryPart, Trunc);
+ }
}
// If an induction variable is only used for counting loop iterations or
@@ -2637,17 +2668,14 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1 : VF;
// Compute the scalar steps and save the results in VectorLoopValueMap.
- ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
- Entry[Part][Lane] = Add;
+ VectorLoopValueMap.setScalarValue(EntryVal, Part, Lane, Add);
}
}
- VectorLoopValueMap.initScalar(EntryVal, Entry);
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
@@ -2665,8 +2693,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
-const InnerLoopVectorizer::VectorParts &
-InnerLoopVectorizer::getVectorValue(Value *V) {
+Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
assert(!V->getType()->isVoidTy() && "Type does not produce a value");
@@ -2675,17 +2702,16 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
if (Legal->hasStride(V))
V = ConstantInt::get(V->getType(), 1);
- // If we have this scalar in the map, return it.
- if (VectorLoopValueMap.hasVector(V))
- return VectorLoopValueMap.VectorMapStorage[V];
+ // If we have a vector mapped to this value, return it.
+ if (VectorLoopValueMap.hasVectorValue(V, Part))
+ return VectorLoopValueMap.getVectorValue(V, Part);
// If the value has not been vectorized, check if it has been scalarized
// instead. If it has been scalarized, and we actually need the value in
// vector form, we will construct the vector values on demand.
- if (VectorLoopValueMap.hasScalar(V)) {
+ if (VectorLoopValueMap.hasAnyScalarValue(V)) {
- // Initialize a new vector map entry.
- VectorParts Entry(UF);
+ Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, Part, 0);
// If we've scalarized a value, that value should be an instruction.
auto *I = cast<Instruction>(V);
@@ -2693,17 +2719,17 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// If we aren't vectorizing, we can just copy the scalar map values over to
// the vector map.
if (VF == 1) {
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = getScalarValue(V, Part, 0);
- return VectorLoopValueMap.initVector(V, Entry);
+ VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
+ return ScalarValue;
}
- // Get the last scalar instruction we generated for V. If the value is
- // known to be uniform after vectorization, this corresponds to lane zero
- // of the last unroll iteration. Otherwise, the last instruction is the one
- // we created for the last vector lane of the last unroll iteration.
+ // Get the last scalar instruction we generated for V and Part. If the value
+ // is known to be uniform after vectorization, this corresponds to lane zero
+ // of the Part unroll iteration. Otherwise, the last instruction is the one
+ // we created for the last vector lane of the Part unroll iteration.
unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
- auto *LastInst = cast<Instruction>(getScalarValue(V, UF - 1, LastLane));
+ auto *LastInst =
+ cast<Instruction>(VectorLoopValueMap.getScalarValue(V, Part, LastLane));
// Set the insert point after the last scalarized instruction. This ensures
// the insertelement sequence will directly follow the scalar definitions.
@@ -2717,52 +2743,50 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// iteration. Otherwise, we construct the vector values using insertelement
// instructions. Since the resulting vectors are stored in
// VectorLoopValueMap, we will only generate the insertelements once.
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *VectorValue = nullptr;
- if (Cost->isUniformAfterVectorization(I, VF)) {
- VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0));
- } else {
- VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
- for (unsigned Lane = 0; Lane < VF; ++Lane)
- VectorValue = Builder.CreateInsertElement(
- VectorValue, getScalarValue(V, Part, Lane),
- Builder.getInt32(Lane));
- }
- Entry[Part] = VectorValue;
+ Value *VectorValue = nullptr;
+ if (Cost->isUniformAfterVectorization(I, VF)) {
+ VectorValue = getBroadcastInstrs(ScalarValue);
+ } else {
+ VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
+ for (unsigned Lane = 0; Lane < VF; ++Lane)
+ VectorValue = Builder.CreateInsertElement(
+ VectorValue, getOrCreateScalarValue(V, Part, Lane),
+ Builder.getInt32(Lane));
}
+ VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
Builder.restoreIP(OldIP);
- return VectorLoopValueMap.initVector(V, Entry);
+ return VectorValue;
}
// If this scalar is unknown, assume that it is a constant or that it is
// loop invariant. Broadcast V and save the value for future uses.
Value *B = getBroadcastInstrs(V);
- return VectorLoopValueMap.initVector(V, VectorParts(UF, B));
+ VectorLoopValueMap.setVectorValue(V, Part, B);
+ return B;
}
-Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
- unsigned Lane) {
+Value *InnerLoopVectorizer::getOrCreateScalarValue(Value *V, unsigned Part,
+ unsigned Lane) {
// If the value is not an instruction contained in the loop, it should
// already be scalar.
if (OrigLoop->isLoopInvariant(V))
return V;
- assert(Lane > 0 ?
- !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
- : true && "Uniform values only have lane zero");
+ assert(Lane > 0 ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
+ : true && "Uniform values only have lane zero");
// If the value from the original loop has not been vectorized, it is
// represented by UF x VF scalar values in the new loop. Return the requested
// scalar value.
- if (VectorLoopValueMap.hasScalar(V))
- return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane];
+ if (VectorLoopValueMap.hasScalarValue(V, Part, Lane))
+ return VectorLoopValueMap.getScalarValue(V, Part, Lane);
// If the value has not been scalarized, get its entry in VectorLoopValueMap
// for the given unroll part. If this entry is not a vector type (i.e., the
// vectorization factor is one), there is no need to generate an
// extractelement instruction.
- auto *U = getVectorValue(V)[Part];
+ auto *U = getOrCreateVectorValue(V, Part);
if (!U->getType()->isVectorTy()) {
assert(VF == 1 && "Value not scalarized has non-vector type");
return U;
@@ -2844,7 +2868,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Index += (VF - 1) * Group->getFactor();
for (unsigned Part = 0; Part < UF; Part++) {
- Value *NewPtr = getScalarValue(Ptr, Part, 0);
+ Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0);
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2887,7 +2911,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
if (!Member)
continue;
- VectorParts Entry(UF);
Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
@@ -2899,10 +2922,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy);
}
- Entry[Part] =
- Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
+ if (Group->isReverse())
+ StridedVec = reverseVector(StridedVec);
+
+ VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
}
- VectorLoopValueMap.initVector(Member, Entry);
}
return;
}
@@ -2919,8 +2943,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Instruction *Member = Group->getMember(i);
assert(Member && "Fail to get a member from an interleaved store group");
- Value *StoredVec =
- getVectorValue(cast<StoreInst>(Member)->getValueOperand())[Part];
+ Value *StoredVec = getOrCreateVectorValue(
+ cast<StoreInst>(Member)->getValueOperand(), Part);
if (Group->isReverse())
StoredVec = reverseVector(StoredVec);
@@ -2981,16 +3005,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
bool CreateGatherScatter =
(Decision == LoopVectorizationCostModel::CM_GatherScatter);
- VectorParts VectorGep;
+ // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
+ // gather/scatter. Otherwise Decision should have been to Scalarize.
+ assert((ConsecutiveStride || CreateGatherScatter) &&
+ "The instruction should be scalarized");
// Handle consecutive loads/stores.
- if (ConsecutiveStride) {
- Ptr = getScalarValue(Ptr, 0, 0);
- } else {
- // At this point we should vector version of GEP for Gather or Scatter
- assert(CreateGatherScatter && "The instruction should be scalarized");
- VectorGep = getVectorValue(Ptr);
- }
+ if (ConsecutiveStride)
+ Ptr = getOrCreateScalarValue(Ptr, 0, 0);
VectorParts Mask = createBlockInMask(Instr->getParent());
// Handle Stores:
@@ -2998,16 +3020,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
setDebugLocFromInst(Builder, SI);
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't use a reference type for "StoredVal".
- VectorParts StoredVal = getVectorValue(SI->getValueOperand());
for (unsigned Part = 0; Part < UF; ++Part) {
Instruction *NewSI = nullptr;
+ Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr;
- NewSI = Builder.CreateMaskedScatter(StoredVal[Part], VectorGep[Part],
- Alignment, MaskPart);
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3016,7 +3037,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (Reverse) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
- StoredVal[Part] = reverseVector(StoredVal[Part]);
+ StoredVal = reverseVector(StoredVal);
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr =
@@ -3030,11 +3054,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(SI))
- NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment,
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
Mask[Part]);
else
- NewSI =
- Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
addMetadata(NewSI, SI);
}
@@ -3044,14 +3067,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// Handle loads.
assert(LI && "Must have a load instruction");
setDebugLocFromInst(Builder, LI);
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Instruction *NewLI;
+ Value *NewLI;
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
- NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
nullptr, "wide.masked.gather");
- Entry[Part] = NewLI;
+ addMetadata(NewLI, LI);
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3073,11 +3096,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
"wide.masked.load");
else
NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
- Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
+
+ // Add metadata to the load, but setVectorValue to the reverse shuffle.
+ addMetadata(NewLI, LI);
+ if (Reverse)
+ NewLI = reverseVector(NewLI);
}
- addMetadata(NewLI, LI);
+ VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
}
- VectorLoopValueMap.initVector(Instr, Entry);
}
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
@@ -3094,9 +3120,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- // Initialize a new scalar map entry.
- ScalarParts Entry(UF);
-
VectorParts Cond;
if (IfPredicateInstr)
Cond = createBlockInMask(Instr->getParent());
@@ -3108,7 +3131,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
// For each scalar that we create:
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
@@ -3129,7 +3151,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane);
+ auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Part, Lane);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -3138,7 +3160,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
Builder.Insert(Cloned);
// Add the cloned scalar to the scalar map entry.
- Entry[Part][Lane] = Cloned;
+ VectorLoopValueMap.setScalarValue(Instr, Part, Lane, Cloned);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
@@ -3150,7 +3172,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
}
}
- VectorLoopValueMap.initScalar(Instr, Entry);
}
PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
@@ -3786,10 +3807,10 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasVector(KV.first))
+ if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
continue;
- VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
- for (Value *&I : Parts) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *I = getOrCreateVectorValue(KV.first, Part);
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
continue;
Type *OriginalTy = I->getType();
@@ -3878,7 +3899,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
I->replaceAllUsesWith(Res);
cast<Instruction>(I)->eraseFromParent();
Erased.insert(I);
- I = Res;
+ VectorLoopValueMap.resetVectorValue(KV.first, Part, Res);
}
}
@@ -3887,15 +3908,15 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasVector(KV.first))
+ if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
continue;
- VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
- for (Value *&I : Parts) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *I = getOrCreateVectorValue(KV.first, Part);
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
if (Inst && Inst->use_empty()) {
Value *NewI = Inst->getOperand(0);
Inst->eraseFromParent();
- I = NewI;
+ VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI);
}
}
}
@@ -4025,28 +4046,29 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// We constructed a temporary phi node in the first phase of vectorization.
// This phi node will eventually be deleted.
- VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi);
- Builder.SetInsertPoint(cast<Instruction>(PhiParts[0]));
+ Builder.SetInsertPoint(
+ cast<Instruction>(VectorLoopValueMap.getVectorValue(Phi, 0)));
// Create a phi node for the new recurrence. The current value will either be
// the initial value inserted into a vector or loop-varying vector value.
auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
- // Get the vectorized previous value.
- auto &PreviousParts = getVectorValue(Previous);
+ // Get the vectorized previous value of the last part UF - 1. It appears last
+ // among all unrolled iterations, due to the order of their construction.
+ Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
// Set the insertion point after the previous value if it is an instruction.
// Note that the previous value may have been constant-folded so it is not
// guaranteed to be an instruction in the vector loop. Also, if the previous
// value is a phi node, we should insert after all the phi nodes to avoid
// breaking basic block verification.
- if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) ||
- isa<PHINode>(PreviousParts[UF - 1]))
+ if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) ||
+ isa<PHINode>(PreviousLastPart))
Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
else
Builder.SetInsertPoint(
- &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
+ &*++BasicBlock::iterator(cast<Instruction>(PreviousLastPart)));
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
@@ -4061,15 +4083,16 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// Shuffle the current and previous vector and update the vector parts.
for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
+ Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
auto *Shuffle =
- VF > 1
- ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part],
- ConstantVector::get(ShuffleMask))
- : Incoming;
- PhiParts[Part]->replaceAllUsesWith(Shuffle);
- cast<Instruction>(PhiParts[Part])->eraseFromParent();
- PhiParts[Part] = Shuffle;
- Incoming = PreviousParts[Part];
+ VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
+ ConstantVector::get(ShuffleMask))
+ : Incoming;
+ PhiPart->replaceAllUsesWith(Shuffle);
+ cast<Instruction>(PhiPart)->eraseFromParent();
+ VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
+ Incoming = PreviousPart;
}
// Fix the latch value of the new recurrence in the vector loop.
@@ -4097,7 +4120,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// `Incoming`. This is analogous to the vectorized case above: extracting the
// second last element when VF > 1.
else if (UF > 1)
- ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2];
+ ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2);
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
@@ -4148,8 +4171,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
- const VectorParts &VectorExit = getVectorValue(LoopExitInst);
- Type *VecTy = VectorExit[0]->getType();
+ Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
@@ -4187,18 +4209,17 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
- const VectorParts &VecRdxPhi = getVectorValue(Phi);
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
- const VectorParts &Val = getVectorValue(LoopVal);
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
+ Value *Val = getOrCreateVectorValue(LoopVal, Part);
// Make sure to add the reduction stat value only to the
// first unroll part.
- Value *StartVal = (part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(StartVal, LoopVectorPreHeader);
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch());
+ Value *StartVal = (Part == 0) ? VectorStart : Identity;
+ cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
+ cast<PHINode>(VecRdxPhi)
+ ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
}
// Before each round, move the insertion point right between
@@ -4207,7 +4228,6 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// instructions.
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst);
// If the vector reduction can be performed in a smaller type, we truncate
@@ -4216,37 +4236,42 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(LoopVectorBody->getTerminator());
- for (unsigned part = 0; part < UF; ++part) {
- Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ VectorParts RdxParts(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
+ Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
- : Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[part]->user_begin();
- UI != RdxParts[part]->user_end();)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[Part]->user_begin();
+ UI != RdxParts[Part]->user_end();)
if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
- RdxParts[part] = Extnd;
+ (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
+ RdxParts[Part] = Extnd;
} else {
++UI;
}
}
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- for (unsigned part = 0; part < UF; ++part)
- RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
+ VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]);
+ }
}
// Reduce all of the unrolled parts into a single vector.
- Value *ReducedPartRdx = RdxParts[0];
+ Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);
- for (unsigned part = 1; part < UF; ++part) {
+ for (unsigned Part = 1; Part < UF; ++Part) {
+ Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(
- Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
+ Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
ReducedPartRdx, "bin.rdx"));
else
ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
- Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
+ Builder, MinMaxKind, ReducedPartRdx, RdxPart);
}
if (VF > 1) {
@@ -4518,14 +4543,16 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(BI && "Unexpected terminator found");
if (BI->isConditional()) {
- VectorParts EdgeMask = getVectorValue(BI->getCondition());
- if (BI->getSuccessor(0) != Dst)
- for (unsigned part = 0; part < UF; ++part)
- EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
+ VectorParts EdgeMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
- for (unsigned part = 0; part < UF; ++part)
- EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
+ EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
+ EdgeMask[Part] = EdgeMaskPart;
+ }
EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
@@ -4544,23 +4571,27 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
+ VectorParts BlockMask(UF);
+
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- const VectorParts &BlockMask = getVectorValue(C);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = getOrCreateVectorValue(C, Part);
BlockMaskCache[BB] = BlockMask;
return BlockMask;
}
// This is the block mask. We OR all incoming edges, and with zero.
Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
- VectorParts BlockMask = getVectorValue(Zero);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = getOrCreateVectorValue(Zero, Part);
// For each pred:
- for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
- VectorParts EM = createEdgeMask(*it, BB);
- for (unsigned part = 0; part < UF; ++part)
- BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
+ for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {
+ VectorParts EM = createEdgeMask(*It, BB);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]);
}
BlockMaskCache[BB] = BlockMask;
@@ -4575,15 +4606,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
- VectorParts Entry(UF);
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
// This is phase one of vectorizing PHIs.
Type *VecTy =
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
- Entry[part] = PHINode::Create(
+ Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
+ VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
}
- VectorLoopValueMap.initVector(P, Entry);
return;
}
@@ -4607,21 +4637,22 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
for (unsigned In = 0; In < NumIncoming; In++) {
VectorParts Cond =
createEdgeMask(P->getIncomingBlock(In), P->getParent());
- const VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part);
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
if (In == 0)
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]);
+ Entry[Part] = Builder.CreateSelect(Cond[Part], In0, In0);
else
// Select between the current value and the previous incoming edge
// based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part],
+ Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part],
"predphi");
}
}
- VectorLoopValueMap.initVector(P, Entry);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]);
return;
}
@@ -4652,18 +4683,15 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
// These are the scalar results. Notice that we don't generate vector GEPs
// because scalar GEPs result in better code.
- ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
SclrGep->setName("next.gep");
- Entry[Part][Lane] = SclrGep;
+ VectorLoopValueMap.setScalarValue(P, Part, Lane, SclrGep);
}
}
- VectorLoopValueMap.initScalar(P, Entry);
return;
}
}
@@ -4713,7 +4741,6 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// is vector-typed. Thus, to keep the representation compact, we only use
// vector-typed operands for loop-varying values.
auto *GEP = cast<GetElementPtrInst>(&I);
- VectorParts Entry(UF);
if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
// If we are vectorizing, but the GEP has only loop-invariant operands,
@@ -4729,8 +4756,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// collectLoopScalars() and teach getVectorValue() to broadcast
// the lane-zero scalar value.
auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateVectorSplat(VF, Clone);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
+ VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
+ addMetadata(EntryPart, GEP);
+ }
} else {
// If the GEP has at least one loop-varying operand, we are sure to
// produce a vector of pointers. But if we are only unrolling, we want
@@ -4743,9 +4773,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// The pointer operand of the new GEP. If it's loop-invariant, we
// won't broadcast it.
- auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand())
- ? GEP->getPointerOperand()
- : getVectorValue(GEP->getPointerOperand())[Part];
+ auto *Ptr =
+ OrigLoop->isLoopInvariant(GEP->getPointerOperand())
+ ? GEP->getPointerOperand()
+ : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
// Collect all the indices for the new GEP. If any index is
// loop-invariant, we won't broadcast it.
@@ -4754,7 +4785,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
if (OrigLoop->isLoopInvariant(U.get()))
Indices.push_back(U.get());
else
- Indices.push_back(getVectorValue(U.get())[Part]);
+ Indices.push_back(getOrCreateVectorValue(U.get(), Part));
}
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
@@ -4764,12 +4795,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
: Builder.CreateGEP(Ptr, Indices);
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
- Entry[Part] = NewGEP;
+ VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
+ addMetadata(NewGEP, GEP);
}
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, GEP);
break;
}
case Instruction::UDiv:
@@ -4800,22 +4830,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// Just widen binops.
auto *BinOp = cast<BinaryOperator>(&I);
setDebugLocFromInst(Builder, BinOp);
- const VectorParts &A = getVectorValue(BinOp->getOperand(0));
- const VectorParts &B = getVectorValue(BinOp->getOperand(1));
- // Use this vector value for all users of the original instruction.
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
+ Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
+ Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
VecOp->copyIRFlags(BinOp);
- Entry[Part] = V;
+ // Use this vector value for all users of the original instruction.
+ VectorLoopValueMap.setVectorValue(&I, Part, V);
+ addMetadata(V, BinOp);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, BinOp);
break;
}
case Instruction::Select: {
@@ -4831,20 +4859,19 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
- const VectorParts &Cond = getVectorValue(I.getOperand(0));
- const VectorParts &Op0 = getVectorValue(I.getOperand(1));
- const VectorParts &Op1 = getVectorValue(I.getOperand(2));
- auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
+ auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), 0, 0);
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part] = Builder.CreateSelect(
- InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
+ Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
+ Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
+ Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
+ Value *Sel =
+ Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
+ VectorLoopValueMap.setVectorValue(&I, Part, Sel);
+ addMetadata(Sel, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -4854,22 +4881,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
bool FCmp = (I.getOpcode() == Instruction::FCmp);
auto *Cmp = dyn_cast<CmpInst>(&I);
setDebugLocFromInst(Builder, Cmp);
- const VectorParts &A = getVectorValue(Cmp->getOperand(0));
- const VectorParts &B = getVectorValue(Cmp->getOperand(1));
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
+ Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
Value *C = nullptr;
if (FCmp) {
- C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
} else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
- Entry[Part] = C;
+ VectorLoopValueMap.setVectorValue(&I, Part, C);
+ addMetadata(C, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -4906,12 +4931,12 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
Type *DestTy =
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
- const VectorParts &A = getVectorValue(CI->getOperand(0));
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *A = getOrCreateVectorValue(CI->getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ VectorLoopValueMap.setVectorValue(&I, Part, Cast);
+ addMetadata(Cast, &I);
+ }
break;
}
@@ -4949,17 +4974,14 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
break;
}
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
Value *Arg = CI->getArgOperand(i);
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
- const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
- Arg = VectorArg[Part];
- }
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
+ Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part);
Args.push_back(Arg);
}
@@ -4992,11 +5014,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
if (isa<FPMathOperator>(V))
V->copyFastMathFlags(CI);
- Entry[Part] = V;
+ VectorLoopValueMap.setVectorValue(&I, Part, V);
+ addMetadata(V, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -5363,7 +5384,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, DT)) {
+ if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
+ SinkAfter, DT)) {
FirstOrderRecurrences.insert(Phi);
continue;
}
@@ -7636,6 +7658,15 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV) {
// 2. Copy and widen instructions from the old loop into the new loop.
+ // Move instructions to handle first-order recurrences.
+ DenseMap<Instruction *, Instruction *> SinkAfter = Legal->getSinkAfter();
+ for (auto &Entry : SinkAfter) {
+ Entry.first->removeFromParent();
+ Entry.first->insertAfter(Entry.second);
+ DEBUG(dbgs() << "Sinking" << *Entry.first << " after" << *Entry.second
+ << " to vectorize a 1st order recurrence.\n");
+ }
+
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
// example, original induction update instructions can become dead because we
@@ -7787,8 +7818,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Check the loop for a trip count threshold:
- // do not vectorize loops with a tiny trip count.
+ PredicatedScalarEvolution PSE(*SE, *L);
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationRequirements Requirements(*ORE);
+ LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
+ &Requirements, &Hints);
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
+ emitMissedWarning(F, L, Hints, ORE);
+ return false;
+ }
+
+ // Check the function attributes to find out if this function should be
+ // optimized for size.
+ bool OptForSize =
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+
+ // Check the loop for a trip count threshold: vectorize loops with a tiny trip
+ // count by optimizing for size, to minimize overheads.
unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
bool HasExpectedTC = (ExpectedTC > 0);
@@ -7802,36 +7850,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
- << "This loop is not worth vectorizing.");
+ << "This loop is worth vectorizing only if no scalar "
+ << "iteration overheads are incurred.");
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
- ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
- "NotBeneficial", L)
- << "vectorization is not beneficial "
- "and is not explicitly forced");
- return false;
+ // Loops with a very small trip count are considered for vectorization
+ // under OptForSize, thereby making sure the cost of their loop body is
+ // dominant, free of runtime guards and scalar iteration overheads.
+ OptForSize = true;
}
}
- PredicatedScalarEvolution PSE(*SE, *L);
-
- // Check if it is legal to vectorize the loop.
- LoopVectorizationRequirements Requirements(*ORE);
- LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
- &Requirements, &Hints);
- if (!LVL.canVectorize()) {
- DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
- emitMissedWarning(F, L, Hints, ORE);
- return false;
- }
-
- // Check the function attributes to find out if this function should be
- // optimized for size.
- bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
-
// Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b267230d3185..b494526369d6 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -173,6 +173,11 @@ static unsigned getAltOpcode(unsigned Op) {
}
}
+/// true if the \p Value is odd, false otherwise.
+static bool isOdd(unsigned Value) {
+ return Value & 1;
+}
+
///\returns bool representing if Opcode \p Op can be part
/// of an alternate sequence which can later be merged as
/// a ShuffleVector instruction.
@@ -190,7 +195,7 @@ static unsigned isAltInst(ArrayRef<Value *> VL) {
unsigned AltOpcode = getAltOpcode(Opcode);
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+ if (!I || I->getOpcode() != (isOdd(i) ? AltOpcode : Opcode))
return 0;
}
return Instruction::ShuffleVector;
@@ -504,7 +509,7 @@ private:
Last->NeedToGather = !Vectorized;
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
- assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
+ assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
ScalarToTreeEntry[VL[i]] = idx;
}
} else {
@@ -521,6 +526,20 @@ private:
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
+ TreeEntry *getTreeEntry(Value *V) {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return &VectorizableTree[I->second];
+ return nullptr;
+ }
+
+ const TreeEntry *getTreeEntry(Value *V) const {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return &VectorizableTree[I->second];
+ return nullptr;
+ }
+
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value*, int> ScalarToTreeEntry;
@@ -1048,14 +1067,14 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
for (TreeEntry &EIdx : VectorizableTree) {
TreeEntry *Entry = &EIdx;
+ // No need to handle users of gathered values.
+ if (Entry->NeedToGather)
+ continue;
+
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
- // No need to handle users of gathered values.
- if (Entry->NeedToGather)
- continue;
-
// Check if the scalar is externally used as an extra arg.
auto ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
@@ -1072,9 +1091,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
continue;
// Skip in-tree scalars that become vectors
- if (ScalarToTreeEntry.count(U)) {
- int Idx = ScalarToTreeEntry[U];
- TreeEntry *UseEntry = &VectorizableTree[Idx];
+ if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in Lane 0 will
@@ -1083,7 +1100,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
- assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
+ assert(!UseEntry->NeedToGather && "Bad state");
continue;
}
}
@@ -1156,9 +1173,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Check if this is a duplicate of another entry.
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[0])) {
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
@@ -1997,7 +2012,7 @@ int BoUpSLP::getSpillCost() {
// Update LiveValues.
LiveValues.erase(PrevInst);
for (auto &J : PrevInst->operands()) {
- if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
+ if (isa<Instruction>(&*J) && getTreeEntry(&*J))
LiveValues.insert(cast<Instruction>(&*J));
}
@@ -2393,9 +2408,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
CSEBlocks.insert(Insrt->getParent());
// Add to our 'need-to-extract' list.
- if (ScalarToTreeEntry.count(VL[i])) {
- int Idx = ScalarToTreeEntry[VL[i]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[i])) {
// Find which lane we need to extract.
int FoundLane = -1;
for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
@@ -2415,11 +2428,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
}
Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
- SmallDenseMap<Value*, int>::const_iterator Entry
- = ScalarToTreeEntry.find(VL[0]);
- if (Entry != ScalarToTreeEntry.end()) {
- int Idx = Entry->second;
- const TreeEntry *En = &VectorizableTree[Idx];
+ if (const TreeEntry *En = getTreeEntry(VL[0])) {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -2427,12 +2436,9 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[0]))
if (E->isSame(VL))
return vectorizeTree(E);
- }
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -2667,9 +2673,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast to
// ExternalUses list to make sure that an extract will be generated in the
// future.
- if (ScalarToTreeEntry.count(LI->getPointerOperand()))
- ExternalUses.push_back(
- ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
+ Value *PO = LI->getPointerOperand();
+ if (getTreeEntry(PO))
+ ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
@@ -2700,9 +2706,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast to
// ExternalUses list to make sure that an extract will be generated in the
// future.
- if (ScalarToTreeEntry.count(SI->getPointerOperand()))
- ExternalUses.push_back(
- ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
+ Value *PO = SI->getPointerOperand();
+ if (getTreeEntry(PO))
+ ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
if (!Alignment) {
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
@@ -2783,7 +2789,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
- if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
+ if (ScalarArg && getTreeEntry(ScalarArg))
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
E->VectorizedValue = V;
@@ -2819,7 +2825,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned e = E->Scalars.size();
SmallVector<Constant *, 8> Mask(e);
for (unsigned i = 0; i < e; ++i) {
- if (i & 1) {
+ if (isOdd(i)) {
Mask[i] = Builder.getInt32(e + i);
OddScalars.push_back(E->Scalars[i]);
} else {
@@ -2897,10 +2903,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// has multiple uses of the same value.
if (User && !is_contained(Scalar->users(), User))
continue;
- assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
-
- int Idx = ScalarToTreeEntry[Scalar];
- TreeEntry *E = &VectorizableTree[Idx];
+ TreeEntry *E = getTreeEntry(Scalar);
+ assert(E && "Invalid scalar");
assert(!E->NeedToGather && "Extracting from a gather list");
Value *Vec = E->VectorizedValue;
@@ -2986,7 +2990,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
- assert((ScalarToTreeEntry.count(U) ||
+ assert((getTreeEntry(U) ||
// It is legal to replace users in the ignorelist by undef.
is_contained(UserIgnoreList, U)) &&
"Replacing out-of-tree value with undef");
@@ -3449,7 +3453,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
I = I->getNextNode()) {
ScheduleData *SD = BS->getScheduleData(I);
assert(
- SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) &&
+ SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr) &&
"scheduler and vectorizer have different opinion on what is a bundle");
SD->FirstInBundle->SchedulingPriority = Idx++;
if (SD->isSchedulingEntity()) {
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index a21928317888..fb2f509dcbaa 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
- initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
initializeSLPVectorizerPass(Registry);
initializeLoadStoreVectorizerPass(Registry);
@@ -36,8 +35,8 @@ void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
initializeVectorization(*unwrap(R));
}
+// DEPRECATED: Remove after the LLVM 5 release.
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBBVectorizePass());
}
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {