summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-01 20:58:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-01 20:58:36 +0000
commitf382538d471e38a9b98f016c4caebd24c8d60b62 (patch)
treed30f3d58b1044b5355d50c17a6a96c6a0b35703a /lib/Target
parentee2f195dd3e40f49698ca4dc2666ec09c770e80d (diff)
downloadsrc-test-f382538d471e38a9b98f016c4caebd24c8d60b62.tar.gz
src-test-f382538d471e38a9b98f016c4caebd24c8d60b62.zip
Notes
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedM1.td340
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp55
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.h5
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp324
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td6
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp106
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/AMDGPU/SIDefines.h40
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp26
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td13
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp13
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h6
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp132
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp5
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td3
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp9
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp9
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp4
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td2
-rw-r--r--lib/Target/AVR/AVRTargetMachine.cpp4
-rw-r--r--lib/Target/AVR/AVRTargetMachine.h4
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp4
-rw-r--r--lib/Target/BPF/CMakeLists.txt2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp189
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp170
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h5
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td59
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp49
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.cpp4
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.h4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp24
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h3
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp298
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp110
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h8
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td12
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td5
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp20
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h1
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp4
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp4
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp34
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp9
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp266
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp5
-rw-r--r--lib/Target/X86/X86TargetMachine.h4
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp4
74 files changed, 1829 insertions, 683 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 4af5fef4287cc..abe28460c83ab 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -190,6 +190,7 @@ def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -226,6 +227,7 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
FeatureCRC,
FeatureCrypto,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon
]>;
diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td
index 3fbbc0be682d7..3b71cf8399a0d 100644
--- a/lib/Target/AArch64/AArch64SchedM1.td
+++ b/lib/Target/AArch64/AArch64SchedM1.td
@@ -23,7 +23,7 @@ def ExynosM1Model : SchedMachineModel {
let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
let LoadLatency = 4; // Optimistic load cases.
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
- let CompleteModel = 0; // Use the default model otherwise.
+ let CompleteModel = 1; // Use the default model otherwise.
}
//===----------------------------------------------------------------------===//
@@ -72,14 +72,14 @@ def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
-def M1WriteLA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
+def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteL5]>]>;
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; }
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
-def M1WriteSA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
+def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteS1]>]>;
@@ -125,13 +125,13 @@ def : WriteRes<WriteAdr, []> { let Latency = 0; }
// Load instructions.
def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; }
-def : SchedAlias<WriteLDIdx, M1WriteLA>;
+def : SchedAlias<WriteLDIdx, M1WriteLX>;
// Store instructions.
def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
-def : SchedAlias<WriteSTIdx, M1WriteSA>;
+def : SchedAlias<WriteSTIdx, M1WriteSX>;
// FP data instructions.
def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
@@ -231,6 +231,111 @@ def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
def M1WriteTB : SchedWriteRes<[M1UnitC,
M1UnitALU]> { let Latency = 2; }
+def M1WriteVLDA : SchedWriteRes<[M1UnitL,
+ M1UnitL]> { let Latency = 6; }
+def M1WriteVLDB : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 7; }
+def M1WriteVLDC : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 8; }
+def M1WriteVLDD : SchedWriteRes<[M1UnitL,
+ M1UnitNALU]> { let Latency = 7;
+ let ResourceCycles = [2]; }
+def M1WriteVLDE : SchedWriteRes<[M1UnitL,
+ M1UnitNALU]> { let Latency = 6; }
+def M1WriteVLDF : SchedWriteRes<[M1UnitL,
+ M1UnitL]> { let Latency = 10;
+ let ResourceCycles = [5]; }
+def M1WriteVLDG : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 7;
+ let ResourceCycles = [2]; }
+def M1WriteVLDH : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 6; }
+def M1WriteVLDI : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 12;
+ let ResourceCycles = [6]; }
+def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 9;
+ let ResourceCycles = [4]; }
+def M1WriteVLDK : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 9;
+ let ResourceCycles = [4]; }
+def M1WriteVLDL : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 7;
+ let ResourceCycles = [2]; }
+def M1WriteVLDM : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 7;
+ let ResourceCycles = [2]; }
+def M1WriteVLDN : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 14;
+ let ResourceCycles = [7]; }
+
+def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
+def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
+def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
+def M1WriteVSTD : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 7;
+ let ResourceCycles = [7]; }
+def M1WriteVSTE : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 8;
+ let ResourceCycles = [8]; }
+def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 15;
+ let ResourceCycles = [15]; }
+def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 16;
+ let ResourceCycles = [16]; }
+def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 14;
+ let ResourceCycles = [14]; }
+def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 17;
+ let ResourceCycles = [17]; }
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
@@ -360,8 +465,233 @@ def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64
def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
// ASIMD load instructions.
+def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[M1WriteVLDD,
+ WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
+def : InstRW<[M1WriteVLDE,
+ WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>;
+def : InstRW<[M1WriteVLDG,
+ WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>;
+def : InstRW<[M1WriteVLDG,
+ WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>;
+def : InstRW<[M1WriteVLDH,
+ WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>;
+def : InstRW<[M1WriteVLDJ,
+ WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>;
+def : InstRW<[M1WriteVLDJ,
+ WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>;
+def : InstRW<[M1WriteVLDL,
+ WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>;
+def : InstRW<[M1WriteVLDK,
+ WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>;
+def : InstRW<[M1WriteVLDK,
+ WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>;
+def : InstRW<[M1WriteVLDM,
+ WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
// ASIMD store instructions.
+def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTE,
+ WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[M1WriteVSTE,
+ WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTG,
+ WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[M1WriteVSTG,
+ WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTI,
+ WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[M1WriteVSTI,
+ WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index cb3f72a524f51..d4a8cecdb29f1 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -256,9 +256,9 @@ namespace {
/// AArch64 Code Generator Pass Configuration Options.
class AArch64PassConfig : public TargetPassConfig {
public:
- AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+ AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM.getOptLevel() != CodeGenOpt::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
}
@@ -317,7 +317,7 @@ TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
}
TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new AArch64PassConfig(this, PM);
+ return new AArch64PassConfig(*this, PM);
}
void AArch64PassConfig::addIRPasses() {
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f473944cd5283..0959014812d8b 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -503,40 +503,37 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
Info.PrivateSegmentSize = FrameInfo.getStackSize();
- if (!FrameInfo.hasCalls()) {
- Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
- MRI.isPhysRegUsed(AMDGPU::VCC_HI);
-
- // If there are no calls, MachineRegisterInfo can tell us the used register
- // count easily.
-
- MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
- for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
- if (MRI.isPhysRegUsed(Reg)) {
- HighestVGPRReg = Reg;
- break;
- }
- }
- MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
- for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
- if (MRI.isPhysRegUsed(Reg)) {
- HighestSGPRReg = Reg;
- break;
- }
- }
+ Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
+ MRI.isPhysRegUsed(AMDGPU::VCC_HI);
- // We found the maximum register index. They start at 0, so add one to get the
- // number of registers.
- Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
- TRI.getHWRegIndex(HighestVGPRReg) + 1;
- Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
- TRI.getHWRegIndex(HighestSGPRReg) + 1;
+ // If there are no calls, MachineRegisterInfo can tell us the used register
+ // count easily.
- return Info;
+ MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestVGPRReg = Reg;
+ break;
+ }
}
- llvm_unreachable("calls not implemented");
+ MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestSGPRReg = Reg;
+ break;
+ }
+ }
+
+ // We found the maximum register index. They start at 0, so add one to get the
+ // number of registers.
+ Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestVGPRReg) + 1;
+ Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestSGPRReg) + 1;
+
+ return Info;
}
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 48827f4639974..596f02ae4a649 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -456,7 +456,7 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
- AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
+ AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
// Exceptions and StackMaps are not supported, so these passes will never do
// anything.
@@ -487,7 +487,7 @@ public:
class R600PassConfig final : public AMDGPUPassConfig {
public:
- R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
+ R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
: AMDGPUPassConfig(TM, PM) {}
ScheduleDAGInstrs *createMachineScheduler(
@@ -503,7 +503,7 @@ public:
class GCNPassConfig final : public AMDGPUPassConfig {
public:
- GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
+ GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
: AMDGPUPassConfig(TM, PM) {}
GCNTargetMachine &getGCNTargetMachine() const {
@@ -682,7 +682,7 @@ void R600PassConfig::addPreEmitPass() {
}
TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new R600PassConfig(this, PM);
+ return new R600PassConfig(*this, PM);
}
//===----------------------------------------------------------------------===//
@@ -844,6 +844,6 @@ void GCNPassConfig::addPreEmitPass() {
}
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new GCNPassConfig(this, PM);
+ return new GCNPassConfig(*this, PM);
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 934bf7f31bab4..a3c7c1982d0a6 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -69,7 +69,6 @@ public:
return -1;
return 0;
}
-
};
//===----------------------------------------------------------------------===//
@@ -89,6 +88,10 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
const R600Subtarget *getSubtargetImpl(const Function &) const override;
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f5541e08e1b72..cc68c971b2490 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -161,7 +161,8 @@ public:
ImmTyOpSel,
ImmTyOpSelHi,
ImmTyNegLo,
- ImmTyNegHi
+ ImmTyNegHi,
+ ImmTySwizzle
};
struct TokOp {
@@ -474,6 +475,7 @@ public:
bool isSWaitCnt() const;
bool isHwreg() const;
bool isSendMsg() const;
+ bool isSwizzle() const;
bool isSMRDOffset8() const;
bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
@@ -659,6 +661,7 @@ public:
case ImmTyOpSelHi: OS << "OpSelHi"; break;
case ImmTyNegLo: OS << "NegLo"; break;
case ImmTyNegHi: OS << "NegHi"; break;
+ case ImmTySwizzle: OS << "Swizzle"; break;
}
}
@@ -994,6 +997,12 @@ private:
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
+ bool trySkipId(const StringRef Id);
+ bool trySkipToken(const AsmToken::TokenKind Kind);
+ bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
+ bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
+ bool parseExpr(int64_t &Imm);
+
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -1003,6 +1012,19 @@ public:
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+ bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
+ const unsigned MinVal,
+ const unsigned MaxVal,
+ const StringRef ErrMsg);
+ OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
+ bool parseSwizzleOffset(int64_t &Imm);
+ bool parseSwizzleMacro(int64_t &Imm);
+ bool parseSwizzleQuadPerm(int64_t &Imm);
+ bool parseSwizzleBitmaskPerm(int64_t &Imm);
+ bool parseSwizzleBroadcast(int64_t &Imm);
+ bool parseSwizzleSwap(int64_t &Imm);
+ bool parseSwizzleReverse(int64_t &Imm);
+
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
@@ -2785,7 +2807,13 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
OptionalIdx[Op.getImmTy()] = i;
}
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+ AMDGPUOperand::ImmTy OffsetType =
+ (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
+ Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
+ AMDGPUOperand::ImmTyOffset;
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
+
if (!IsGdsHardcoded) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
}
@@ -3384,6 +3412,298 @@ bool AMDGPUOperand::isSendMsg() const {
}
//===----------------------------------------------------------------------===//
+// parser helpers
+//===----------------------------------------------------------------------===//
+
+bool
+AMDGPUAsmParser::trySkipId(const StringRef Id) {
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == Id) {
+ Parser.Lex();
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
+ if (getLexer().getKind() == Kind) {
+ Parser.Lex();
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
+ const StringRef ErrMsg) {
+ if (!trySkipToken(Kind)) {
+ Error(Parser.getTok().getLoc(), ErrMsg);
+ return false;
+ }
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseExpr(int64_t &Imm) {
+ return !getParser().parseAbsoluteExpression(Imm);
+}
+
+bool
+AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
+ SMLoc S = Parser.getTok().getLoc();
+ if (getLexer().getKind() == AsmToken::String) {
+ Val = Parser.getTok().getStringContents();
+ Parser.Lex();
+ return true;
+ } else {
+ Error(S, ErrMsg);
+ return false;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// swizzle
+//===----------------------------------------------------------------------===//
+
+LLVM_READNONE
+static unsigned
+encodeBitmaskPerm(const unsigned AndMask,
+ const unsigned OrMask,
+ const unsigned XorMask) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ return BITMASK_PERM_ENC |
+ (AndMask << BITMASK_AND_SHIFT) |
+ (OrMask << BITMASK_OR_SHIFT) |
+ (XorMask << BITMASK_XOR_SHIFT);
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
+ const unsigned MinVal,
+ const unsigned MaxVal,
+ const StringRef ErrMsg) {
+ for (unsigned i = 0; i < OpNum; ++i) {
+ if (!skipToken(AsmToken::Comma, "expected a comma")){
+ return false;
+ }
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (!parseExpr(Op[i])) {
+ return false;
+ }
+ if (Op[i] < MinVal || Op[i] > MaxVal) {
+ Error(ExprLoc, ErrMsg);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ int64_t Lane[LANE_NUM];
+ if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
+ "expected a 2-bit lane id")) {
+ Imm = QUAD_PERM_ENC;
+ for (auto i = 0; i < LANE_NUM; ++i) {
+ Imm |= Lane[i] << (LANE_SHIFT * i);
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t GroupSize;
+ int64_t LaneIdx;
+
+ if (!parseSwizzleOperands(1, &GroupSize,
+ 2, 32,
+ "group size must be in the interval [2,32]")) {
+ return false;
+ }
+ if (!isPowerOf2_64(GroupSize)) {
+ Error(S, "group size must be a power of two");
+ return false;
+ }
+ if (parseSwizzleOperands(1, &LaneIdx,
+ 0, GroupSize - 1,
+ "lane id must be in the interval [0,group size - 1]")) {
+ Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t GroupSize;
+
+ if (!parseSwizzleOperands(1, &GroupSize,
+ 2, 32, "group size must be in the interval [2,32]")) {
+ return false;
+ }
+ if (!isPowerOf2_64(GroupSize)) {
+ Error(S, "group size must be a power of two");
+ return false;
+ }
+
+ Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t GroupSize;
+
+ if (!parseSwizzleOperands(1, &GroupSize,
+ 1, 16, "group size must be in the interval [1,16]")) {
+ return false;
+ }
+ if (!isPowerOf2_64(GroupSize)) {
+ Error(S, "group size must be a power of two");
+ return false;
+ }
+
+ Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ if (!skipToken(AsmToken::Comma, "expected a comma")) {
+ return false;
+ }
+
+ StringRef Ctl;
+ SMLoc StrLoc = Parser.getTok().getLoc();
+ if (!parseString(Ctl)) {
+ return false;
+ }
+ if (Ctl.size() != BITMASK_WIDTH) {
+ Error(StrLoc, "expected a 5-character mask");
+ return false;
+ }
+
+ unsigned AndMask = 0;
+ unsigned OrMask = 0;
+ unsigned XorMask = 0;
+
+ for (size_t i = 0; i < Ctl.size(); ++i) {
+ unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
+ switch(Ctl[i]) {
+ default:
+ Error(StrLoc, "invalid mask");
+ return false;
+ case '0':
+ break;
+ case '1':
+ OrMask |= Mask;
+ break;
+ case 'p':
+ AndMask |= Mask;
+ break;
+ case 'i':
+ AndMask |= Mask;
+ XorMask |= Mask;
+ break;
+ }
+ }
+
+ Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
+
+ SMLoc OffsetLoc = Parser.getTok().getLoc();
+
+ if (!parseExpr(Imm)) {
+ return false;
+ }
+ if (!isUInt<16>(Imm)) {
+ Error(OffsetLoc, "expected a 16-bit offset");
+ return false;
+ }
+ return true;
+}
+
+bool
+AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
+
+ SMLoc ModeLoc = Parser.getTok().getLoc();
+ bool Ok = false;
+
+ if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
+ Ok = parseSwizzleQuadPerm(Imm);
+ } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
+ Ok = parseSwizzleBitmaskPerm(Imm);
+ } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
+ Ok = parseSwizzleBroadcast(Imm);
+ } else if (trySkipId(IdSymbolic[ID_SWAP])) {
+ Ok = parseSwizzleSwap(Imm);
+ } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
+ Ok = parseSwizzleReverse(Imm);
+ } else {
+ Error(ModeLoc, "expected a swizzle mode");
+ }
+
+ return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
+ }
+
+ return false;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Imm = 0;
+
+ if (trySkipId("offset")) {
+
+ bool Ok = false;
+ if (skipToken(AsmToken::Colon, "expected a colon")) {
+ if (trySkipId("swizzle")) {
+ Ok = parseSwizzleMacro(Imm);
+ } else {
+ Ok = parseSwizzleOffset(Imm);
+ }
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
+
+ return Ok? MatchOperand_Success : MatchOperand_ParseFail;
+ } else {
+ return MatchOperand_NoMatch;
+ }
+}
+
+bool
+AMDGPUOperand::isSwizzle() const {
+ return isImmTy(ImmTySwizzle);
+}
+
+//===----------------------------------------------------------------------===//
// sopp branch targets
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index 357e18108e7e8..fc516c3b39c28 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -145,10 +145,10 @@ class DS_1A2D_Off8_RET<string opName,
let hasPostISelHook = 1;
}
-class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
+class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, Operand ofs = offset>
: DS_Pseudo<opName,
(outs rc:$vdst),
- (ins VGPR_32:$addr, offset:$offset, gds:$gds),
+ (ins VGPR_32:$addr, ofs:$offset, gds:$gds),
"$vdst, $addr$offset$gds"> {
let has_data0 = 0;
@@ -440,7 +440,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
-def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">;
+def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, SwizzleImm>;
}
let mayStore = 0 in {
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index a817ff3cbaf09..523eea41897ea 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -1160,6 +1160,112 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
O << SImm16; // Unknown simm16 code.
}
+static void printSwizzleBitmask(const uint16_t AndMask,
+ const uint16_t OrMask,
+ const uint16_t XorMask,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask;
+ uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
+
+ O << "\"";
+
+ for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
+ uint16_t p0 = Probe0 & Mask;
+ uint16_t p1 = Probe1 & Mask;
+
+ if (p0 == p1) {
+ if (p0 == 0) {
+ O << "0";
+ } else {
+ O << "1";
+ }
+ } else {
+ if (p0 == 0) {
+ O << "p";
+ } else {
+ O << "i";
+ }
+ }
+ }
+
+ O << "\"";
+}
+
+void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::Swizzle;
+
+ uint16_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == 0) {
+ return;
+ }
+
+ O << " offset:";
+
+ if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
+
+ O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
+ for (auto i = 0; i < LANE_NUM; ++i) {
+ O << ",";
+ O << formatDec(Imm & LANE_MASK);
+ Imm >>= LANE_SHIFT;
+ }
+ O << ")";
+
+ } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
+
+ uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
+ uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK;
+ uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
+
+ if (AndMask == BITMASK_MAX &&
+ OrMask == 0 &&
+ countPopulation(XorMask) == 1) {
+
+ O << "swizzle(" << IdSymbolic[ID_SWAP];
+ O << ",";
+ O << formatDec(XorMask);
+ O << ")";
+
+ } else if (AndMask == BITMASK_MAX &&
+ OrMask == 0 && XorMask > 0 &&
+ isPowerOf2_64(XorMask + 1)) {
+
+ O << "swizzle(" << IdSymbolic[ID_REVERSE];
+ O << ",";
+ O << formatDec(XorMask + 1);
+ O << ")";
+
+ } else {
+
+ uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
+ if (GroupSize > 1 &&
+ isPowerOf2_64(GroupSize) &&
+ OrMask < GroupSize &&
+ XorMask == 0) {
+
+ O << "swizzle(" << IdSymbolic[ID_BROADCAST];
+ O << ",";
+ O << formatDec(GroupSize);
+ O << ",";
+ O << formatDec(OrMask);
+ O << ")";
+
+ } else {
+ O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
+ O << ",";
+ printSwizzleBitmask(AndMask, OrMask, XorMask, O);
+ O << ")";
+ }
+ }
+ } else {
+ printU16ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index c0b8e5c510893..c8094c4b840a1 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -193,6 +193,8 @@ private:
raw_ostream &O);
void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 80967edee0ab1..5cd90323ff67b 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -281,6 +281,46 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
} // namespace Hwreg
+namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
+
+enum Id { // id of symbolic names
+ ID_QUAD_PERM = 0,
+ ID_BITMASK_PERM,
+ ID_SWAP,
+ ID_REVERSE,
+ ID_BROADCAST
+};
+
+enum EncBits {
+
+ // swizzle mode encodings
+
+ QUAD_PERM_ENC = 0x8000,
+ QUAD_PERM_ENC_MASK = 0xFF00,
+
+ BITMASK_PERM_ENC = 0x0000,
+ BITMASK_PERM_ENC_MASK = 0x8000,
+
+ // QUAD_PERM encodings
+
+ LANE_MASK = 0x3,
+ LANE_MAX = LANE_MASK,
+ LANE_SHIFT = 2,
+ LANE_NUM = 4,
+
+ // BITMASK_PERM encodings
+
+ BITMASK_MASK = 0x1F,
+ BITMASK_MAX = BITMASK_MASK,
+ BITMASK_WIDTH = 5,
+
+ BITMASK_AND_SHIFT = 0,
+ BITMASK_OR_SHIFT = 5,
+ BITMASK_XOR_SHIFT = 10
+};
+
+} // namespace Swizzle
+
namespace SDWA {
enum SdwaSel {
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b5e3ce3dfe3ed..e22166d03e9ae 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -826,7 +826,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+ MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
@@ -1149,8 +1150,10 @@ void SIInsertWaitcnts::updateEventWaitCntAfter(
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
- if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) {
- if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
+ uint64_t TSFlags = Inst.getDesc().TSFlags;
+ if (TII->isDS(Inst) && (TSFlags & SIInstrFlags::LGKM_CNT)) {
+ if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds) &&
+ TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
} else {
@@ -1183,7 +1186,7 @@ void SIInsertWaitcnts::updateEventWaitCntAfter(
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() &&
- (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) {
+ (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
} else if (TII->isSMRD(Inst)) {
@@ -1715,6 +1718,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
AMDGPUASI = ST->getAMDGPUAS();
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1859,5 +1863,19 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Modified = true;
+ }
+
return Modified;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index c5287c7f64ba4..445bf79a7814e 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -383,6 +383,14 @@ def SendMsgMatchClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+def SwizzleMatchClass : AsmOperandClass {
+ let Name = "Swizzle";
+ let PredicateMethod = "isSwizzle";
+ let ParserMethod = "parseSwizzleOp";
+ let RenderMethod = "addImmOperands";
+ let IsOptional = 1;
+}
+
def ExpTgtMatchClass : AsmOperandClass {
let Name = "ExpTgt";
let PredicateMethod = "isExpTgt";
@@ -395,6 +403,11 @@ def SendMsgImm : Operand<i32> {
let ParserMatchClass = SendMsgMatchClass;
}
+def SwizzleImm : Operand<i16> {
+ let PrintMethod = "printSwizzle";
+ let ParserMatchClass = SwizzleMatchClass;
+}
+
def SWaitMatchClass : AsmOperandClass {
let Name = "SWaitCnt";
let RenderMethod = "addImmOperands";
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index b91cdddc5520f..a648c178101a1 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -66,6 +66,12 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
+ // Stack access is very expensive. CSRs are also the high registers, and we
+ // want to minimize the number of used registers.
+ unsigned getCSRFirstUseCost() const override {
+ return 100;
+ }
+
unsigned getFrameRegister(const MachineFunction &MF) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index b6868de6a74e3..03b11ae80500e 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -65,5 +65,18 @@ const char* const IdSymbolic[] = {
};
} // namespace Hwreg
+
+namespace Swizzle {
+
+// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ "QUAD_PERM",
+ "BITMASK_PERM",
+ "SWAP",
+ "REVERSE",
+ "BROADCAST",
+};
+
+} // namespace Swizzle
} // namespace AMDGPU
} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index b2dc2c0e364cd..ebb2be22b4879 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -25,6 +25,12 @@ namespace Hwreg { // Symbolic names for the hwreg(...) syntax.
extern const char* const IdSymbolic[];
} // namespace Hwreg
+
+namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace Swizzle
} // namespace AMDGPU
} // namespace llvm
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 90baabcdb6520..ec49f0d37af44 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -757,14 +757,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MI.eraseFromParent();
}
-static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MBB->addLiveIn(*I);
-}
-
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
-/// possible. This only gets used at -O0 so we don't care about efficiency of the
-/// generated code.
+/// possible. This only gets used at -O0 so we don't care about efficiency of
+/// the generated code.
bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned LdrexOp, unsigned StrexOp,
@@ -773,16 +768,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
bool IsThumb = STI->isThumb();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(1).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ unsigned NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -795,25 +789,35 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
if (UxtOp) {
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg())
- .addReg(Desired.getReg(), RegState::Kill);
+ BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
+ .addReg(DesiredReg, RegState::Kill);
if (!IsThumb)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
}
// .Lloadcmp:
+ // mov wStatus, #0
// ldrex rDest, [rAddr]
// cmp rDest, rDesired
// bne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
+ if (!StatusDead) {
+ if (IsThumb) {
+ BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg)
+ .addDef(ARM::CPSR, RegState::Dead)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ } else {
+ BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ }
+ }
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
- MIB.addReg(Addr.getReg());
+ MIB.addReg(AddrReg);
if (LdrexOp == ARM::t2LDREX)
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
MIB.add(predOps(ARMCC::AL));
@@ -821,7 +825,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .add(Desired)
+ .addReg(DesiredReg)
.add(predOps(ARMCC::AL));
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
@@ -835,21 +839,16 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
// strex rStatus, rNew, [rAddr]
// cmp rStatus, #0
// bne .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
-
- MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
- MIB.add(New);
- MIB.add(Addr);
+ MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg)
+ .addReg(NewReg)
+ .addReg(AddrReg);
if (StrexOp == ARM::t2STREX)
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
MIB.add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
@@ -861,12 +860,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
@@ -894,19 +905,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
+ bool StatusDead = MI.getOperand(1).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ MachineOperand New = MI.getOperand(4);
+ New.setIsKill(false);
unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
- unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
- unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
+ unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -922,26 +933,21 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// cmp rDestLo, rDesiredLo
// sbcs rStatus<dead>, rDestHi, rDesiredHi
// bne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
- MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL));
+ MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(DestLo, getKillRegState(Dest.isDead()))
- .addReg(DesiredLo, getKillRegState(Desired.isDead()))
+ .addReg(DesiredLo)
.add(predOps(ARMCC::AL));
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(DestHi, getKillRegState(Dest.isDead()))
- .addReg(DesiredHi, getKillRegState(Desired.isDead()))
+ .addReg(DesiredHi)
.addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
@@ -956,18 +962,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// strexd rStatus, rNewLo, rNewHi, [rAddr]
// cmp rStatus, #0
// bne .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
- MIB.add(Addr).add(predOps(ARMCC::AL));
+ MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
@@ -979,12 +981,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 4f7a0ab4e2203..c2b2502843c0a 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -968,8 +968,9 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- bool isLiveIn = MF.getRegInfo().isLiveIn(Reg);
- if (!isLiveIn)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isLiveIn = MRI.isLiveIn(Reg);
+ if (!isLiveIn && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
// If NoGap is true, push consecutive registers and then leave the rest
// for other instructions. e.g.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bee83dfb6f636..423f97ccacd64 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1413,7 +1413,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
// and make use of the same compressed jump table format as Thumb-2.
-let Size = 2 in {
+let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
+ isIndirectBranch = 1 in {
def tTBB_JT : tPseudoInst<(outs),
(ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
Sched<[WriteBr]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bf3d820e7b7d0..45471a4e95b39 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3494,7 +3494,8 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br,
let AsmMatchConverter = "cvtThumbBranches";
}
-let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
+let Size = 4, isNotDuplicable = 1, isBranch = 1, isTerminator = 1,
+ isBarrier = 1, isIndirectBranch = 1 in {
// available in both v8-M.Baseline and Thumb2 targets
def t2BR_JT : t2basePseudoInst<(outs),
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index c4f23c66e4eab..f5e4043882ff7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -382,7 +382,7 @@ namespace {
/// ARM Code Generator Pass Configuration Options.
class ARMPassConfig : public TargetPassConfig {
public:
- ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
ARMBaseTargetMachine &getARMTargetMachine() const {
@@ -419,7 +419,7 @@ INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
"ARM Execution Dependency Fix", false, false)
TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new ARMPassConfig(this, PM);
+ return new ARMPassConfig(*this, PM);
}
void ARMPassConfig::addIRPasses() {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index e5eb27114c726..2fcee73228fe7 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -60,6 +60,10 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
/// ARM/Thumb little endian target machine.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 40bf545e83224..b0d1d3fb9ef0b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -729,6 +729,15 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// linker can handle it. GNU AS produces an error in this case.
if (Sym->isExternal() || Value >= 0x400004)
IsResolved = false;
+ // When an ARM function is called from a Thumb function, produce a
+ // relocation so the linker will use the correct branch instruction for ELF
+ // binaries.
+ if (Sym->isELF()) {
+ unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
+ if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) &&
+ !Asm.isThumbFunc(Sym))
+ IsResolved = false;
+ }
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index f917c35b9cebb..f10427e2ed575 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -698,13 +698,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CopyRegs.insert(ArgReg);
// Push the low registers and lr
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!LoRegsToSave.empty()) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
if (LoRegsToSave.count(Reg)) {
- bool isKill = !MF.getRegInfo().isLiveIn(Reg);
- if (isKill)
+ bool isKill = !MRI.isLiveIn(Reg);
+ if (isKill && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
MIB.addReg(Reg, getKillRegState(isKill));
@@ -746,8 +747,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
SmallVector<unsigned, 4> RegsToPush;
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
if (HiRegsToSave.count(*HiRegToSave)) {
- bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
- if (isKill)
+ bool isKill = !MRI.isLiveIn(*HiRegToSave);
+ if (isKill && !MRI.isReserved(*HiRegToSave))
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index ef9c00e4b784e..7d3faac1dcc20 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -1500,9 +1500,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
unsigned DstReg = MI.getOperand(0).getReg();
// BB:
- // cp 0, N
+ // cpi N, 0
// breq RemBB
- BuildMI(BB, dl, TII.get(AVR::CPRdRr)).addReg(ShiftAmtSrcReg).addReg(AVR::R0);
+ BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0);
BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB);
// LoopBB:
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index f10ca394f36ce..5dd8b2c27b212 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -904,7 +904,7 @@ let Defs = [SREG] in
// Compares a register with an 8 bit immediate.
def CPIRdK : FRdK<0b0011,
(outs),
- (ins GPR8:$rd, imm_ldi8:$k),
+ (ins LD8:$rd, imm_ldi8:$k),
"cpi\t$rd, $k",
[(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>;
}
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index fb3262916b4fd..2ab0b1080c6af 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -57,7 +57,7 @@ namespace {
/// AVR Code Generator Pass Configuration Options.
class AVRPassConfig : public TargetPassConfig {
public:
- AVRPassConfig(AVRTargetMachine *TM, PassManagerBase &PM)
+ AVRPassConfig(AVRTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
AVRTargetMachine &getAVRTargetMachine() const {
@@ -71,7 +71,7 @@ public:
} // namespace
TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new AVRPassConfig(this, PM);
+ return new AVRPassConfig(*this, PM);
}
extern "C" void LLVMInitializeAVRTarget() {
diff --git a/lib/Target/AVR/AVRTargetMachine.h b/lib/Target/AVR/AVRTargetMachine.h
index 10345193d14af..795e94e6af03a 100644
--- a/lib/Target/AVR/AVRTargetMachine.h
+++ b/lib/Target/AVR/AVRTargetMachine.h
@@ -41,6 +41,10 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
+
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AVRSubtarget SubTarget;
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 897695633e46b..cf8e735409042 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -58,7 +58,7 @@ namespace {
// BPF Code Generator Pass Configuration Options.
class BPFPassConfig : public TargetPassConfig {
public:
- BPFPassConfig(BPFTargetMachine *TM, PassManagerBase &PM)
+ BPFPassConfig(BPFTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
BPFTargetMachine &getBPFTargetMachine() const {
@@ -70,7 +70,7 @@ public:
}
TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new BPFPassConfig(this, PM);
+ return new BPFPassConfig(*this, PM);
}
// Install an instruction selector pass using
diff --git a/lib/Target/BPF/CMakeLists.txt b/lib/Target/BPF/CMakeLists.txt
index e2654b0465df1..4918653ff19da 100644
--- a/lib/Target/BPF/CMakeLists.txt
+++ b/lib/Target/BPF/CMakeLists.txt
@@ -4,7 +4,7 @@ tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM BPFGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM BPFGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM BPFGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM BPFGenCallingConv.inc -gen-callingconv)
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 8e10c521a77d3..e4434136bf866 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -71,6 +71,9 @@ public:
return true;
}
+ bool ComplexPatternFuncMutatesDAG() const override {
+ return true;
+ }
void PreprocessISelDAG() override;
void EmitFunctionEntryCode() override;
@@ -81,6 +84,7 @@ public:
inline bool SelectAddrGP(SDValue &N, SDValue &R);
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
bool SelectAddrFI(SDValue &N, SDValue &R);
+ bool DetectUseSxtw(SDValue &N, SDValue &R);
StringRef getPassName() const override {
return "Hexagon DAG->DAG Pattern Instruction Selection";
@@ -106,7 +110,6 @@ public:
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
void SelectStore(SDNode *N);
void SelectSHL(SDNode *N);
- void SelectMul(SDNode *N);
void SelectZeroExtend(SDNode *N);
void SelectIntrinsicWChain(SDNode *N);
void SelectIntrinsicWOChain(SDNode *N);
@@ -118,7 +121,7 @@ public:
#include "HexagonGenDAGISel.inc"
private:
- bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+ bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);
bool isOrEquivalentToAdd(const SDNode *N) const;
bool isAlignedMemNode(const MemSDNode *N) const;
bool isPositiveHalfWord(const SDNode *N) const;
@@ -597,90 +600,6 @@ void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
SelectCode(ST);
}
-void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
- SDLoc dl(N);
-
- // %conv.i = sext i32 %tmp1 to i64
- // %conv2.i = sext i32 %add to i64
- // %mul.i = mul nsw i64 %conv2.i, %conv.i
- //
- // --- match with the following ---
- //
- // %mul.i = mpy (%tmp1, %add)
- //
-
- if (N->getValueType(0) == MVT::i64) {
- // Shifting a i64 signed multiply.
- SDValue MulOp0 = N->getOperand(0);
- SDValue MulOp1 = N->getOperand(1);
-
- SDValue OP0;
- SDValue OP1;
-
- // Handle sign_extend and sextload.
- if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext0 = MulOp0.getOperand(0);
- if (Sext0.getNode()->getValueType(0) != MVT::i32) {
- SelectCode(N);
- return;
- }
- OP0 = Sext0;
- } else if (MulOp0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- SelectCode(N);
- return;
- }
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
- OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(), TargetConst0,
- Chain), 0);
- } else {
- SelectCode(N);
- return;
- }
-
- // Same goes for the second operand.
- if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext1 = MulOp1.getOperand(0);
- if (Sext1.getNode()->getValueType(0) != MVT::i32) {
- SelectCode(N);
- return;
- }
- OP1 = Sext1;
- } else if (MulOp1.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- SelectCode(N);
- return;
- }
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
- OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(), TargetConst0,
- Chain), 0);
- } else {
- SelectCode(N);
- return;
- }
-
- // Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl,
- MVT::i64, OP0, OP1);
- ReplaceNode(N, Result);
- return;
- }
-
- SelectCode(N);
-}
-
void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
SDLoc dl(N);
SDValue Shl_0 = N->getOperand(0);
@@ -843,7 +762,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
SDValue V = N->getOperand(1);
SDValue U;
- if (isValueExtension(V, Bits, U)) {
+ if (keepsLowBits(V, Bits, U)) {
SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
N->getOperand(0), U);
ReplaceNode(N, R.getNode());
@@ -949,7 +868,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::SHL: return SelectSHL(N);
case ISD::LOAD: return SelectLoad(N);
case ISD::STORE: return SelectStore(N);
- case ISD::MUL: return SelectMul(N);
case ISD::ZERO_EXTEND: return SelectZeroExtend(N);
case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N);
case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N);
@@ -1327,7 +1245,7 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
}
// Match a frame index that can be used in an addressing mode.
-bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
+bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
if (N.getOpcode() != ISD::FrameIndex)
return false;
auto &HFI = *HST->getFrameLowering();
@@ -1388,16 +1306,83 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
return false;
}
-bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
- unsigned FromBits, SDValue &Src) {
+bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
+ // This (complex pattern) function is meant to detect a sign-extension
+ // i32->i64 on a per-operand basis. This would allow writing single
+ // patterns that would cover a number of combinations of different ways
+ // a sign-extensions could be written. For example:
+ // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
+ // could match either one of these:
+ // (mul (sext x) (sext_inreg y))
+ // (mul (sext-load *p) (sext_inreg y))
+ // (mul (sext_inreg x) (sext y))
+ // etc.
+ //
+ // The returned value will have type i64 and its low word will
+ // contain the value being extended. The high bits are not specified.
+ // The returned type is i64 because the original type of N was i64,
+ // but the users of this function should only use the low-word of the
+ // result, e.g.
+ // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
+
+ if (N.getValueType() != MVT::i64)
+ return false;
+ EVT SrcVT;
+ unsigned Opc = N.getOpcode();
+ switch (Opc) {
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_INREG: {
+ // sext_inreg has the source type as a separate operand.
+ EVT T = Opc == ISD::SIGN_EXTEND
+ ? N.getOperand(0).getValueType()
+ : cast<VTSDNode>(N.getOperand(1))->getVT();
+ if (T.getSizeInBits() != 32)
+ return false;
+ R = N.getOperand(0);
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ if (L->getExtensionType() != ISD::SEXTLOAD)
+ return false;
+ // All extending loads extend to i32, so even if the value in
+ // memory is shorter than 32 bits, it will be i32 after the load.
+ if (L->getMemoryVT().getSizeInBits() > 32)
+ return false;
+ R = N;
+ break;
+ }
+ default:
+ return false;
+ }
+ EVT RT = R.getValueType();
+ if (RT == MVT::i64)
+ return true;
+ assert(RT == MVT::i32);
+ // This is only to produce a value of type i64. Do not rely on the
+ // high bits produced by this.
+ const SDLoc &dl(N);
+ SDValue Ops[] = {
+ CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
+ R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
+ R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
+ };
+ SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
+ MVT::i64, Ops);
+ R = SDValue(T, 0);
+ return true;
+}
+
+bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
+ SDValue &Src) {
unsigned Opc = Val.getOpcode();
switch (Opc) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: {
- SDValue const &Op0 = Val.getOperand(0);
+ const SDValue &Op0 = Val.getOperand(0);
EVT T = Op0.getValueType();
- if (T.isInteger() && T.getSizeInBits() == FromBits) {
+ if (T.isInteger() && T.getSizeInBits() == NumBits) {
Src = Op0;
return true;
}
@@ -1408,23 +1393,23 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
case ISD::AssertZext:
if (Val.getOperand(0).getValueType().isInteger()) {
VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
- if (T->getVT().getSizeInBits() == FromBits) {
+ if (T->getVT().getSizeInBits() == NumBits) {
Src = Val.getOperand(0);
return true;
}
}
break;
case ISD::AND: {
- // Check if this is an AND with "FromBits" of lower bits set to 1.
- uint64_t FromMask = (1 << FromBits) - 1;
+ // Check if this is an AND with NumBits of lower bits set to 1.
+ uint64_t Mask = (1 << NumBits) - 1;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
- if (C->getZExtValue() == FromMask) {
+ if (C->getZExtValue() == Mask) {
Src = Val.getOperand(1);
return true;
}
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
- if (C->getZExtValue() == FromMask) {
+ if (C->getZExtValue() == Mask) {
Src = Val.getOperand(0);
return true;
}
@@ -1433,16 +1418,16 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
}
case ISD::OR:
case ISD::XOR: {
- // OR/XOR with the lower "FromBits" bits set to 0.
- uint64_t FromMask = (1 << FromBits) - 1;
+ // OR/XOR with the lower NumBits bits set to 0.
+ uint64_t Mask = (1 << NumBits) - 1;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
- if ((C->getZExtValue() & FromMask) == 0) {
+ if ((C->getZExtValue() & Mask) == 0) {
Src = Val.getOperand(1);
return true;
}
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
- if ((C->getZExtValue() & FromMask) == 0) {
+ if ((C->getZExtValue() & Mask) == 0) {
Src = Val.getOperand(0);
return true;
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 5ecf9320d5c26..4c6c6eeafbe0e 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1928,11 +1928,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
-
- // We custom lower i64 to i64 mul, so that it is not considered as a legal
- // operation. There is a pattern that will match i64 mul and transform it
- // to a series of instructions.
- setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
for (unsigned IntExpOp :
{ ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 66e07c67958e9..0fef91ec4d3e0 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1769,161 +1769,6 @@ bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
return getType(MI) == HexagonII::TypeCJ && MI.isBranch();
}
-bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
- return (MI.isBranch() && isPredicated(MI)) ||
- isConditionalTransfer(MI) ||
- isConditionalALU32(MI) ||
- isConditionalLoad(MI) ||
- // Predicated stores which don't have a .new on any operands.
- (MI.mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
- !isPredicatedNew(MI));
-}
-
-bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- case Hexagon::A2_paddf:
- case Hexagon::A2_paddfnew:
- case Hexagon::A2_paddif:
- case Hexagon::A2_paddifnew:
- case Hexagon::A2_paddit:
- case Hexagon::A2_padditnew:
- case Hexagon::A2_paddt:
- case Hexagon::A2_paddtnew:
- case Hexagon::A2_pandf:
- case Hexagon::A2_pandfnew:
- case Hexagon::A2_pandt:
- case Hexagon::A2_pandtnew:
- case Hexagon::A2_porf:
- case Hexagon::A2_porfnew:
- case Hexagon::A2_port:
- case Hexagon::A2_portnew:
- case Hexagon::A2_psubf:
- case Hexagon::A2_psubfnew:
- case Hexagon::A2_psubt:
- case Hexagon::A2_psubtnew:
- case Hexagon::A2_pxorf:
- case Hexagon::A2_pxorfnew:
- case Hexagon::A2_pxort:
- case Hexagon::A2_pxortnew:
- case Hexagon::A4_paslhf:
- case Hexagon::A4_paslhfnew:
- case Hexagon::A4_paslht:
- case Hexagon::A4_paslhtnew:
- case Hexagon::A4_pasrhf:
- case Hexagon::A4_pasrhfnew:
- case Hexagon::A4_pasrht:
- case Hexagon::A4_pasrhtnew:
- case Hexagon::A4_psxtbf:
- case Hexagon::A4_psxtbfnew:
- case Hexagon::A4_psxtbt:
- case Hexagon::A4_psxtbtnew:
- case Hexagon::A4_psxthf:
- case Hexagon::A4_psxthfnew:
- case Hexagon::A4_psxtht:
- case Hexagon::A4_psxthtnew:
- case Hexagon::A4_pzxtbf:
- case Hexagon::A4_pzxtbfnew:
- case Hexagon::A4_pzxtbt:
- case Hexagon::A4_pzxtbtnew:
- case Hexagon::A4_pzxthf:
- case Hexagon::A4_pzxthfnew:
- case Hexagon::A4_pzxtht:
- case Hexagon::A4_pzxthtnew:
- case Hexagon::C2_ccombinewf:
- case Hexagon::C2_ccombinewt:
- return true;
- }
- return false;
-}
-
-// FIXME - Function name and it's functionality don't match.
-// It should be renamed to hasPredNewOpcode()
-bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
- if (!MI.getDesc().mayLoad() || !isPredicated(MI))
- return false;
-
- int PNewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
- // Instruction with valid predicated-new opcode can be promoted to .new.
- return PNewOpcode >= 0;
-}
-
-// Returns true if an instruction is a conditional store.
-//
-// Note: It doesn't include conditional new-value stores as they can't be
-// converted to .new predicate.
-bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- default: return false;
- case Hexagon::S4_storeirbt_io:
- case Hexagon::S4_storeirbf_io:
- case Hexagon::S4_pstorerbt_rr:
- case Hexagon::S4_pstorerbf_rr:
- case Hexagon::S2_pstorerbt_io:
- case Hexagon::S2_pstorerbf_io:
- case Hexagon::S2_pstorerbt_pi:
- case Hexagon::S2_pstorerbf_pi:
- case Hexagon::S2_pstorerdt_io:
- case Hexagon::S2_pstorerdf_io:
- case Hexagon::S4_pstorerdt_rr:
- case Hexagon::S4_pstorerdf_rr:
- case Hexagon::S2_pstorerdt_pi:
- case Hexagon::S2_pstorerdf_pi:
- case Hexagon::S2_pstorerht_io:
- case Hexagon::S2_pstorerhf_io:
- case Hexagon::S4_storeirht_io:
- case Hexagon::S4_storeirhf_io:
- case Hexagon::S4_pstorerht_rr:
- case Hexagon::S4_pstorerhf_rr:
- case Hexagon::S2_pstorerht_pi:
- case Hexagon::S2_pstorerhf_pi:
- case Hexagon::S2_pstorerit_io:
- case Hexagon::S2_pstorerif_io:
- case Hexagon::S4_storeirit_io:
- case Hexagon::S4_storeirif_io:
- case Hexagon::S4_pstorerit_rr:
- case Hexagon::S4_pstorerif_rr:
- case Hexagon::S2_pstorerit_pi:
- case Hexagon::S2_pstorerif_pi:
-
- // V4 global address store before promoting to dot new.
- case Hexagon::S4_pstorerdt_abs:
- case Hexagon::S4_pstorerdf_abs:
- case Hexagon::S4_pstorerbt_abs:
- case Hexagon::S4_pstorerbf_abs:
- case Hexagon::S4_pstorerht_abs:
- case Hexagon::S4_pstorerhf_abs:
- case Hexagon::S4_pstorerit_abs:
- case Hexagon::S4_pstorerif_abs:
- return true;
-
- // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
- // from the "Conditional Store" list. Because a predicated new value store
- // would NOT be promoted to a double dot new store.
- // This function returns yes for those stores that are predicated but not
- // yet promoted to predicate dot new instructions.
- }
-}
-
-bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- case Hexagon::A2_tfrt:
- case Hexagon::A2_tfrf:
- case Hexagon::C2_cmoveit:
- case Hexagon::C2_cmoveif:
- case Hexagon::A2_tfrtnew:
- case Hexagon::A2_tfrfnew:
- case Hexagon::C2_cmovenewit:
- case Hexagon::C2_cmovenewif:
- case Hexagon::A2_tfrpt:
- case Hexagon::A2_tfrpf:
- return true;
-
- default:
- return false;
- }
- return false;
-}
-
// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
// isFPImm and later getFPImm as well.
bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
@@ -3474,6 +3319,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
// Returns the opcode to use when converting MI, which is a conditional jump,
// into a conditional instruction which uses the .new value of the predicate.
// We also use branch probabilities to add a hint to the jump.
+// If MBPI is null, all edges will be treated as equally likely for the
+// purposes of establishing a predication hint.
int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
// We assume that block can have at most two successors.
@@ -3482,9 +3329,16 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
bool Taken = false;
const BranchProbability OneHalf(1, 2);
+ auto getEdgeProbability = [MBPI] (const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) {
+ if (MBPI)
+ return MBPI->getEdgeProbability(Src, Dst);
+ return BranchProbability(1, Src->succ_size());
+ };
+
if (BrTarget.isMBB()) {
const MachineBasicBlock *Dst = BrTarget.getMBB();
- Taken = MBPI->getEdgeProbability(Src, Dst) >= OneHalf;
+ Taken = getEdgeProbability(Src, Dst) >= OneHalf;
} else {
// The branch target is not a basic block (most likely a function).
// Since BPI only gives probabilities for targets that are basic blocks,
@@ -3521,7 +3375,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
for (const MachineBasicBlock *SB : B.successors()) {
if (!B.isLayoutSuccessor(SB))
continue;
- Taken = MBPI->getEdgeProbability(Src, SB) < OneHalf;
+ Taken = getEdgeProbability(Src, SB) < OneHalf;
break;
}
} else {
@@ -3534,7 +3388,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
BT = Op.getMBB();
break;
}
- Taken = BT && MBPI->getEdgeProbability(Src, BT) < OneHalf;
+ Taken = BT && getEdgeProbability(Src, BT) < OneHalf;
}
} // if (!Bad)
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 97b9bc9546885..944d0161a7c8e 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -314,11 +314,6 @@ public:
bool isAccumulator(const MachineInstr &MI) const;
bool isComplex(const MachineInstr &MI) const;
bool isCompoundBranchInstr(const MachineInstr &MI) const;
- bool isCondInst(const MachineInstr &MI) const;
- bool isConditionalALU32 (const MachineInstr &MI) const;
- bool isConditionalLoad(const MachineInstr &MI) const;
- bool isConditionalStore(const MachineInstr &MI) const;
- bool isConditionalTransfer(const MachineInstr &MI) const;
bool isConstExtended(const MachineInstr &MI) const;
bool isDeallocRet(const MachineInstr &MI) const;
bool isDependent(const MachineInstr &ProdMI,
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index e6ea67d55b43c..9aa185fc85a6a 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -59,6 +59,9 @@ cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
cl::Hidden, cl::init(false),
cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
+static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
+ cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
+
static const char *HexagonVolatileMemcpyName
= "hexagon_memcpy_forward_vp4cp4n2";
@@ -477,7 +480,7 @@ Value *Simplifier::simplify(Context &C) {
WorkListType Q;
Q.push_back(C.Root);
unsigned Count = 0;
- const unsigned Limit = 100000;
+ const unsigned Limit = SimplifyLimit;
while (!Q.empty()) {
if (Count++ >= Limit)
@@ -501,8 +504,7 @@ Value *Simplifier::simplify(Context &C) {
Q.push_back(Op);
}
}
- assert(Count < Limit && "Infinite loop in HLIR/simplify?");
- return C.Root;
+ return Count < Limit ? C.Root : nullptr;
}
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 81b5e10c11731..70ed123bc8981 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -382,48 +382,42 @@ def: T_MType_acc_pat3 <M4_or_andn, and, or>;
def: T_MType_acc_pat3 <M4_and_andn, and, and>;
def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
+// This complex pattern is really only to detect various forms of
+// sign-extension i32->i64. The selected value will be of type i64
+// whose low word is the value being extended. The high word is
+// unspecified.
+def Usxtw : ComplexPattern<i64, 1, "DetectUseSxtw", [], []>;
+
def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
-def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>;
def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
+def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
-// Return true if for a 32 to 64-bit sign-extended load.
-def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD)
- return false;
- return LD->getExtensionType() == ISD::SEXTLOAD &&
- LD->getMemoryVT().getScalarType() == MVT::i32;
-}]>;
-
-def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)),
- (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
-
-def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)),
- (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)),
+ (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
-def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2),
- (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
+def: Pat<(mul Sext64:$Rs, Sext64:$Rt),
+ (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
// Multiply and accumulate, use full result.
// Rxx[+-]=mpy(Rs,Rt)
-def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))),
- (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)),
+ (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
-def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))),
- (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)),
+ (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
-def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))),
- (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
+ (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
-def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))),
- (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
+ (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
-def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))),
- (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
+ (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
-def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))),
- (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
+ (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
InstHexagon MI>
@@ -545,7 +539,8 @@ def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
-def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>;
+def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>;
def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src),
(A2_abs IntRegs:$src)>;
@@ -1159,8 +1154,8 @@ multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
defm: T_MinMax_pats<Op, I64, Inst, SwapInst>;
}
-def: Pat<(add (Sext64 I32:$Rs), I64:$Rt),
- (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(add Sext64:$Rs, I64:$Rt),
+ (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>;
let AddedComplexity = 200 in {
defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 8e93df6201aea..14ecf297d351c 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -223,7 +223,7 @@ namespace {
/// Hexagon Code Generator Pass Configuration Options.
class HexagonPassConfig : public TargetPassConfig {
public:
- HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+ HexagonPassConfig(HexagonTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
HexagonTargetMachine &getHexagonTargetMachine() const {
@@ -245,7 +245,7 @@ public:
} // namespace
TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new HexagonPassConfig(this, PM);
+ return new HexagonPassConfig(*this, PM);
}
void HexagonPassConfig::addIRPasses() {
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index cd474921d4bc8..fa08afe4019d7 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -273,25 +273,17 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
return true;
- // Check if this is a predicate dependence.
- const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
- if (RC == &Hexagon::PredRegsRegClass)
- return true;
-
- // Assumes that the first operand of the CALLr is the function address.
- if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
- const MachineOperand MO = MI.getOperand(0);
- if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
- return true;
+ // Call-like instructions can be packetized with preceding instructions
+ // that define registers implicitly used or modified by the call. Explicit
+ // uses are still prohibited, as in the case of indirect calls:
+ // r0 = ...
+ // J2_jumpr r0
+ if (DepType == SDep::Data) {
+ for (const MachineOperand MO : MI.operands())
+ if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
+ return true;
}
- if (HII->isJumpR(MI)) {
- const MachineOperand &MO = HII->isPredicated(MI) ? MI.getOperand(1)
- : MI.getOperand(0);
- assert(MO.isReg() && MO.isUse());
- if (MO.getReg() == DepReg)
- return true;
- }
return false;
}
@@ -333,11 +325,13 @@ bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI,
const TargetRegisterClass *NewRC) {
// Vector stores can be predicated, and can be new-value stores, but
// they cannot be predicated on a .new predicate value.
- if (NewRC == &Hexagon::PredRegsRegClass)
+ if (NewRC == &Hexagon::PredRegsRegClass) {
if (HII->isHVXVec(MI) && MI.mayStore())
return false;
- return HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn() ||
- HII->mayBeNewStore(MI);
+ return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0;
+ }
+ // If the class is not PredRegs, it could only apply to new-value stores.
+ return HII->mayBeNewStore(MI);
}
// Promote an instructiont to its .cur form.
@@ -760,11 +754,14 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
return false;
}
-static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) {
+static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
+ unsigned DepReg) {
for (auto &MO : I.operands()) {
- if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
+ if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg))
return true;
- if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+ if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit())
+ continue;
+ if (CheckDef == MO.isDef())
return true;
}
return false;
@@ -798,7 +795,8 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
// If dependency is trough an implicitly defined register, we should not
// newify the use.
- if (isImplicitDependency(PI, DepReg))
+ if (isImplicitDependency(PI, true, DepReg) ||
+ isImplicitDependency(MI, false, DepReg))
return false;
const MCInstrDesc& MCID = PI.getDesc();
@@ -808,8 +806,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
// predicate .new
if (RC == &Hexagon::PredRegsRegClass)
- if (HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn())
- return HII->predCanBeUsedAsDotNew(PI, DepReg);
+ return HII->predCanBeUsedAsDotNew(PI, DepReg);
if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
return false;
diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp
index 2a9bc25d7fadb..a2f005ce445a8 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -76,7 +76,7 @@ namespace {
// Lanai Code Generator Pass Configuration Options.
class LanaiPassConfig : public TargetPassConfig {
public:
- LanaiPassConfig(LanaiTargetMachine *TM, PassManagerBase *PassManager)
+ LanaiPassConfig(LanaiTargetMachine &TM, PassManagerBase *PassManager)
: TargetPassConfig(TM, *PassManager) {}
LanaiTargetMachine &getLanaiTargetMachine() const {
@@ -91,7 +91,7 @@ public:
TargetPassConfig *
LanaiTargetMachine::createPassConfig(PassManagerBase &PassManager) {
- return new LanaiPassConfig(this, &PassManager);
+ return new LanaiPassConfig(*this, &PassManager);
}
// Install an instruction selector pass.
diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h
index 5278c70d909da..083ba6fdf8416 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/lib/Target/Lanai/LanaiTargetMachine.h
@@ -49,6 +49,10 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index bebe5fa35ad42..d8fdc8ba674e6 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -52,7 +52,7 @@ namespace {
/// MSP430 Code Generator Pass Configuration Options.
class MSP430PassConfig : public TargetPassConfig {
public:
- MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM)
+ MSP430PassConfig(MSP430TargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
MSP430TargetMachine &getMSP430TargetMachine() const {
@@ -65,7 +65,7 @@ public:
} // namespace
TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new MSP430PassConfig(this, PM);
+ return new MSP430PassConfig(*this, PM);
}
bool MSP430PassConfig::addInstSelector() {
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index e7ceca9612a92..a222080f6b814 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -1,4 +1,4 @@
-//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//===- Mips16FrameLowering.cpp - Mips16 Frame Information -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,20 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips16FrameLowering.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16FrameLowering.h"
#include "Mips16InstrInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
using namespace llvm;
@@ -63,7 +72,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.size()) {
+ if (!CSI.empty()) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
@@ -80,7 +89,6 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
if (hasFP(MF))
BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
.addReg(Mips::SP).setMIFlag(MachineInstr::FrameSetup);
-
}
void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 092de216e9b89..a9d6ab0558927 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -201,7 +201,7 @@ namespace {
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
public:
- MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
+ MipsPassConfig(MipsTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
// The current implementation of long branch pass requires a scratch
// register ($at) to be available before branch instructions. Tail merging
@@ -227,7 +227,7 @@ public:
} // end anonymous namespace
TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new MipsPassConfig(this, PM);
+ return new MipsPassConfig(*this, PM);
}
void MipsPassConfig::addIRPasses() {
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 140d7133f879b..a3462868cb111 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -66,6 +66,10 @@ public:
bool isLittleEndian() const { return isLittle; }
const MipsABIInfo &getABI() const { return ABI; }
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
/// Mips32/64 big endian target machine.
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index ab5298d0dcfd6..8dfbfece9b8eb 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -132,7 +132,7 @@ namespace {
class NVPTXPassConfig : public TargetPassConfig {
public:
- NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
+ NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
NVPTXTargetMachine &getNVPTXTargetMachine() const {
@@ -163,7 +163,7 @@ private:
} // end anonymous namespace
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new NVPTXPassConfig(this, PM);
+ return new NVPTXPassConfig(*this, PM);
}
void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 1ed8e3b1e9357..2f3981be22f83 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -65,6 +65,9 @@ public:
TargetIRAnalysis getTargetIRAnalysis() override;
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
}; // NVPTXTargetMachine.
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 5fa7b2c6bfb1b..54414457388d3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -77,6 +77,11 @@ STATISTIC(SignExtensionsAdded,
"Number of sign extensions for compare inputs added.");
STATISTIC(ZeroExtensionsAdded,
"Number of zero extensions for compare inputs added.");
+STATISTIC(NumLogicOpsOnComparison,
+ "Number of logical ops on i1 values calculated in GPR.");
+STATISTIC(OmittedForNonExtendUses,
+ "Number of compares not eliminated as they have non-extending uses.");
+
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -275,6 +280,8 @@ private:
bool trySETCC(SDNode *N);
bool tryEXTEND(SDNode *N);
+ bool tryLogicOpOfCompares(SDNode *N);
+ SDValue computeLogicOpInGPR(SDValue LogicOp);
SDValue signExtendInputIfNeeded(SDValue Input);
SDValue zeroExtendInputIfNeeded(SDValue Input);
SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
@@ -282,6 +289,10 @@ private:
int64_t RHSValue, SDLoc dl);
SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
int64_t RHSValue, SDLoc dl);
+ SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
void PeepholePPC64();
@@ -2501,6 +2512,11 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return true;
}
+// Is this opcode a bitwise logical operation?
+static bool isLogicOp(unsigned Opc) {
+ return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
+}
+
/// If this node is a sign/zero extension of an integer comparison,
/// it can usually be computed in GPR's rather than using comparison
/// instructions and ISEL. We only do this on 64-bit targets for now
@@ -2513,13 +2529,20 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) {
N->getOpcode() == ISD::SIGN_EXTEND) &&
"Expecting a zero/sign extend node!");
- if (N->getOperand(0).getOpcode() != ISD::SETCC)
+ SDValue WideRes;
+ // If we are zero-extending the result of a logical operation on i1
+ // values, we can keep the values in GPRs.
+ if (isLogicOp(N->getOperand(0).getOpcode()) &&
+ N->getOperand(0).getValueType() == MVT::i1 &&
+ N->getOpcode() == ISD::ZERO_EXTEND)
+ WideRes = computeLogicOpInGPR(N->getOperand(0));
+ else if (N->getOperand(0).getOpcode() != ISD::SETCC)
return false;
-
- SDValue WideRes =
- getSETCCInGPR(N->getOperand(0),
- N->getOpcode() == ISD::SIGN_EXTEND ?
- SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
+ else
+ WideRes =
+ getSETCCInGPR(N->getOperand(0),
+ N->getOpcode() == ISD::SIGN_EXTEND ?
+ SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
if (!WideRes)
return false;
@@ -2540,6 +2563,159 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) {
return true;
}
+// Lower a logical operation on i1 values into a GPR sequence if possible.
+// The result can be kept in a GPR if requested.
+// Three types of inputs can be handled:
+// - SETCC
+// - TRUNCATE
+// - Logical operation (AND/OR/XOR)
+// There is also a special case that is handled (namely a complement operation
+// achieved with xor %a, -1).
+SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) {
+ assert(isLogicOp(LogicOp.getOpcode()) &&
+ "Can only handle logic operations here.");
+ assert(LogicOp.getValueType() == MVT::i1 &&
+ "Can only handle logic operations on i1 values here.");
+ SDLoc dl(LogicOp);
+ SDValue LHS, RHS;
+
+ // Special case: xor %a, -1
+ bool IsBitwiseNegation = isBitwiseNot(LogicOp);
+
+ // Produces a GPR sequence for each operand of the binary logic operation.
+ // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
+ // the value in a GPR and for logic operations, it will recursively produce
+ // a GPR sequence for the operation.
+ auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
+ unsigned OperandOpcode = Operand.getOpcode();
+ if (OperandOpcode == ISD::SETCC)
+ return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
+ else if (OperandOpcode == ISD::TRUNCATE) {
+ SDValue InputOp = Operand.getOperand(0);
+ EVT InVT = InputOp.getValueType();
+ return
+ SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
+ PPC::RLDICL, dl, InVT, InputOp,
+ getI64Imm(0, dl), getI64Imm(63, dl)), 0);
+ } else if (isLogicOp(OperandOpcode))
+ return computeLogicOpInGPR(Operand);
+ return SDValue();
+ };
+ LHS = getLogicOperand(LogicOp.getOperand(0));
+ RHS = getLogicOperand(LogicOp.getOperand(1));
+
+ // If a GPR sequence can't be produced for the LHS we can't proceed.
+ // Not producing a GPR sequence for the RHS is only a problem if this isn't
+ // a bitwise negation operation.
+ if (!LHS || (!RHS && !IsBitwiseNegation))
+ return SDValue();
+
+ NumLogicOpsOnComparison++;
+
+ // We will use the inputs as 64-bit values.
+ if (LHS.getValueType() == MVT::i32)
+ LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
+ if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
+ RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
+
+ unsigned NewOpc;
+ switch (LogicOp.getOpcode()) {
+ default: llvm_unreachable("Unknown logic operation.");
+ case ISD::AND: NewOpc = PPC::AND8; break;
+ case ISD::OR: NewOpc = PPC::OR8; break;
+ case ISD::XOR: NewOpc = PPC::XOR8; break;
+ }
+
+ if (IsBitwiseNegation) {
+ RHS = getI64Imm(1, dl);
+ NewOpc = PPC::XORI8;
+ }
+
+ return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
+
+}
+
+/// Try performing logical operations on results of comparisons in GPRs.
+/// It is typically preferred from a performance perspective over performing
+/// the operations on individual bits in the CR. We only do this on 64-bit
+/// targets for now as the code is specialized for 64-bit (it uses 64-bit
+/// instructions and assumes 64-bit registers).
+bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) {
+ if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+ return false;
+ if (N->getValueType(0) != MVT::i1)
+ return false;
+ assert(isLogicOp(N->getOpcode()) &&
+ "Expected a logic operation on setcc results.");
+ SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
+ if (!LoweredLogical)
+ return false;
+
+ SDLoc dl(N);
+ bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
+ unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue LHS = LoweredLogical.getOperand(0);
+ SDValue RHS = LoweredLogical.getOperand(1);
+ SDValue WideOp;
+ SDValue OpToConvToRecForm;
+
+ // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode
+ // that is input to the XORI.
+ if (IsBitwiseNegate &&
+ LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
+ OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
+ else if (IsBitwiseNegate)
+ // If the input to the XORI isn't an extension, that's what we're after.
+ OpToConvToRecForm = LoweredLogical.getOperand(0);
+ else
+ // If this is not an XORI, it is a reg-reg logical op and we can convert it
+ // to record-form.
+ OpToConvToRecForm = LoweredLogical;
+
+ // Get the record-form version of the node we're looking to use to get the
+ // CR result from.
+ uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
+ int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
+
+ // Convert the right node to record-form. This is either the logical we're
+ // looking at or it is the input node to the negation (if we're looking at
+ // a bitwise negation).
+ if (NewOpc != -1 && IsBitwiseNegate) {
+ // The input to the XORI has a record-form. Use it.
+ assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
+ "Expected a PPC::XORI8 only for bitwise negation.");
+ // Emit the record-form instruction.
+ std::vector<SDValue> Ops;
+ for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
+ Ops.push_back(OpToConvToRecForm.getOperand(i));
+
+ WideOp =
+ SDValue(CurDAG->getMachineNode(NewOpc, dl,
+ OpToConvToRecForm.getValueType(),
+ MVT::Glue, Ops), 0);
+ } else {
+ assert((NewOpc != -1 || !IsBitwiseNegate) &&
+ "No record form available for AND8/OR8/XOR8?");
+ WideOp =
+ SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
+ MVT::i64, MVT::Glue, LHS, RHS), 0);
+ }
+
+ // Select this node to a single bit from CR0 set by the record-form node
+ // just created. For bitwise negation, use the EQ bit which is the equivalent
+ // of negating the result (i.e. it is a bit set when the result of the
+ // operation is zero).
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
+ SDValue CRBit =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+ MVT::i1, CR0Reg, SRIdxVal,
+ WideOp.getValue(1)), 0);
+ ReplaceNode(N, CRBit.getNode());
+ return true;
+}
+
/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
/// Useful when emitting comparison code for 32-bit values without using
/// the compare instruction (which only considers the lower 32-bits).
@@ -2677,6 +2853,77 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
}
}
+/// Produces a zero-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
+ // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
+ getI64Imm(58, dl), getI64Imm(63, dl)),
+ 0);
+ }
+ }
+}
+
+/// Produces a sign-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+ // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
+ // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+ // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+ SDValue AddInput = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue Addic =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+ AddInput, getI32Imm(~0U, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
+ Addic, Addic.getValue(1)), 0);
+ }
+ }
+}
+
+/// Does this SDValue have any uses for which keeping the value in a GPR is
+/// appropriate. This is meant to be used on values that have type i1 since
+/// it is somewhat meaningless to ask if values of other types can be kept in
+/// GPR's.
+static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
+ assert(Compare.getOpcode() == ISD::SETCC &&
+ "An ISD::SETCC node required here.");
+
+ // For values that have a single use, the caller should obviously already have
+ // checked if that use is an extending use. We check the other uses here.
+ if (Compare.hasOneUse())
+ return true;
+ // We want the value in a GPR if it is being extended, used for a select, or
+ // used in logical operations.
+ for (auto CompareUse : Compare.getNode()->uses())
+ if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
+ CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
+ CompareUse->getOpcode() != ISD::SELECT &&
+ !isLogicOp(CompareUse->getOpcode())) {
+ OmittedForNonExtendUses++;
+ return false;
+ }
+ return true;
+}
+
/// Returns an equivalent of a SETCC node but with the result the same width as
/// the inputs. This can nalso be used for SELECT_CC if either the true or false
/// values is a power of two while the other is zero.
@@ -2686,6 +2933,11 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
Compare.getOpcode() == ISD::SELECT_CC) &&
"An ISD::SETCC node required here.");
+ // Don't convert this comparison to a GPR sequence because there are uses
+ // of the i1 result (i.e. uses that require the result in the CR).
+ if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
+ return SDValue();
+
SDValue LHS = Compare.getOperand(0);
SDValue RHS = Compare.getOperand(1);
@@ -2694,30 +2946,35 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
ISD::CondCode CC =
cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
EVT InputVT = LHS.getValueType();
- if (InputVT != MVT::i32)
+ if (InputVT != MVT::i32 && InputVT != MVT::i64)
return SDValue();
- SDLoc dl(Compare);
- ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
-
if (ConvOpts == SetccInGPROpts::ZExtInvert ||
ConvOpts == SetccInGPROpts::SExtInvert)
CC = ISD::getSetCCInverse(CC, true);
- if (ISD::isSignedIntSetCC(CC)) {
+ bool Inputs32Bit = InputVT == MVT::i32;
+ if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) {
LHS = signExtendInputIfNeeded(LHS);
RHS = signExtendInputIfNeeded(RHS);
- } else if (ISD::isUnsignedIntSetCC(CC)) {
+ } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) {
LHS = zeroExtendInputIfNeeded(LHS);
RHS = zeroExtendInputIfNeeded(RHS);
}
+ SDLoc dl(Compare);
+ ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+ int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
ConvOpts == SetccInGPROpts::SExtInvert;
- if (IsSext)
+
+ if (IsSext && Inputs32Bit)
return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
- return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+ else if (Inputs32Bit)
+ return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+ else if (IsSext)
+ return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+ return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
}
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
@@ -2906,6 +3163,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::AND: {
+ if (tryLogicOpOfCompares(N))
+ return;
+
unsigned Imm, Imm2, SH, MB, ME;
uint64_t Imm64;
@@ -3025,6 +3285,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (tryBitfieldInsert(N))
return;
+ if (tryLogicOpOfCompares(N))
+ return;
+
short Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
@@ -3042,6 +3305,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::XOR: {
+ if (tryLogicOpOfCompares(N))
+ return;
+ break;
+ }
case ISD::ADD: {
short Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 216efcc4a1ee8..41ff9d903aa07 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1041,6 +1041,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemset = 128;
MaxStoresPerMemcpy = 128;
MaxStoresPerMemmove = 128;
+ MaxLoadsPerMemcmp = 128;
+ } else {
+ MaxLoadsPerMemcmp = 8;
+ MaxLoadsPerMemcmpOptSize = 4;
}
}
@@ -1112,6 +1116,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
+ case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
@@ -1593,17 +1598,25 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
- // Check that the mask is shuffling words
-static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
- for (unsigned i = 0; i < 4; ++i) {
- unsigned B0 = N->getMaskElt(i*4);
- unsigned B1 = N->getMaskElt(i*4+1);
- unsigned B2 = N->getMaskElt(i*4+2);
- unsigned B3 = N->getMaskElt(i*4+3);
- if (B0 % 4)
- return false;
- if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
+// Check that the mask is shuffling N byte elements.
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
+ assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
+ "Unexpected element width.");
+
+ unsigned NumOfElem = 16 / Width;
+ unsigned MaskVal[16]; // Width is never greater than 16
+ for (unsigned i = 0; i < NumOfElem; ++i) {
+ MaskVal[0] = N->getMaskElt(i * Width);
+ if (MaskVal[0] % Width) {
return false;
+ }
+
+ for (unsigned int j = 1; j < Width; ++j) {
+ MaskVal[j] = N->getMaskElt(i * Width + j);
+ if (MaskVal[j] != MaskVal[j-1] + 1) {
+ return false;
+ }
+ }
}
return true;
@@ -1611,7 +1624,7 @@ static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
- if (!isWordShuffleMask(N))
+ if (!isNByteElemShuffleMask(N, 4))
return false;
// Now we look at mask elements 0,4,8,12
@@ -1688,7 +1701,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
- if (!isWordShuffleMask(N))
+ if (!isNByteElemShuffleMask(N, 4))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
@@ -1746,6 +1759,66 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
}
}
+/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
+/// if the inputs to the instruction should be swapped and set \p DM to the
+/// value for the immediate.
+/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
+/// AND element 0 of the result comes from the first input (LE) or second input
+/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
+/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
+/// mask.
+bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+
+ // Ensure each byte index of the double word is consecutive.
+ if (!isNByteElemShuffleMask(N, 8))
+ return false;
+
+ unsigned M0 = N->getMaskElt(0) / 8;
+ unsigned M1 = N->getMaskElt(8) / 8;
+ assert(((M0 | M1) < 4) && "A mask element out of bounds?");
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ if ((M0 | M1) < 2) {
+ DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
+ Swap = false;
+ return true;
+ } else
+ return false;
+ }
+
+ if (IsLE) {
+ if (M0 > 1 && M1 < 2) {
+ Swap = false;
+ } else if (M0 < 2 && M1 > 1) {
+ M0 = (M0 + 2) % 4;
+ M1 = (M1 + 2) % 4;
+ Swap = true;
+ } else
+ return false;
+
+ // Note: if control flow comes here that means Swap is already set above
+ DM = (((~M1) & 1) << 1) + ((~M0) & 1);
+ return true;
+ } else { // BE
+ if (M0 < 2 && M1 > 1) {
+ Swap = false;
+ } else if (M0 > 1 && M1 < 2) {
+ M0 = (M0 + 2) % 4;
+ M1 = (M1 + 2) % 4;
+ Swap = true;
+ } else
+ return false;
+
+ // Note: if control flow comes here that means Swap is already set above
+ DM = (M0 << 1) + (M1 & 1);
+ return true;
+ }
+}
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
@@ -7760,6 +7833,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
}
+ if (Subtarget.hasVSX() &&
+ PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
+
+ SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 2f9eb95f6de68..7982a4a9e9fb7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -90,6 +90,10 @@ namespace llvm {
///
VECSHL,
+ /// XXPERMDI - The PPC XXPERMDI instruction
+ ///
+ XXPERMDI,
+
/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,
@@ -454,6 +458,10 @@ namespace llvm {
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
+ /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXPERMDI instruction.
+ bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 165970f9678c3..295590b2acf62 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -735,12 +735,12 @@ def RLDICL_32_64 : MDForm_1<30, 0,
"rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// End fast-isel.
-let isCodeGenOnly = 1 in
-def RLDICL_32 : MDForm_1<30, 0,
- (outs gprc:$rA),
- (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
- []>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLDICL_32 : MDForm_1r<30, 0,
+ (outs gprc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fd6785e963a64..f3c68c443b1bc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1983,3 +1983,7 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
return &PPC::VSRCRegClass;
return RC;
}
+
+int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
+ return PPC::getRecordFormOpcode(Opcode);
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index b30d09e03ec47..8dd4dbb608794 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -290,6 +290,7 @@ public:
return Reg >= PPC::V0 && Reg <= PPC::V31;
}
const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
+ static int getRecordFormOpcode(unsigned Opcode);
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 26b99eced23cb..8223aa655e38b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
+def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
@@ -170,6 +174,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 1589ab03e5075..c4139ca8b7bdb 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -843,7 +843,9 @@ let Uses = [RM] in {
def XXPERMDI : XX3Form_2<60, 10,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
- "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
+ [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
+ imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ddae5befee3e4..b9004cc8a9f54 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,7 +296,7 @@ namespace {
/// PPC Code Generator Pass Configuration Options.
class PPCPassConfig : public TargetPassConfig {
public:
- PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
PPCTargetMachine &getPPCTargetMachine() const {
@@ -316,7 +316,7 @@ public:
} // end anonymous namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new PPCPassConfig(this, PM);
+ return new PPCPassConfig(*this, PM);
}
void PPCPassConfig::addIRPasses() {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index f2838351cee56..b8f5a2083d808 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -55,6 +55,10 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
/// PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 7ee1317bf72f2..5559cdc5fe467 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -215,6 +215,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
+bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
+ MaxLoadSize = 8;
+ return true;
+}
+
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
return true;
}
@@ -239,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
}
unsigned PPCTTIImpl::getCacheLineSize() {
- // This is currently only used for the data prefetch pass which is only
- // enabled for BG/Q by default.
- return CacheLineSize;
+ // Check first if the user specified a custom line size.
+ if (CacheLineSize.getNumOccurrences() > 0)
+ return CacheLineSize;
+
+ // On P7, P8 or P9 we have a cache line size of 128.
+ unsigned Directive = ST->getDarwinDirective();
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
+ Directive == PPC::DIR_PWR9)
+ return 128;
+
+ // On other processors return a default of 64 bytes.
+ return 64;
}
unsigned PPCTTIImpl::getPrefetchDistance() {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 6ce70fbd8778e..2e0116fee04c6 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -60,6 +60,7 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index a20331cd0a3ed..efdde04c582d0 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -56,5 +56,5 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
}
TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new TargetPassConfig(this, PM);
+ return new TargetPassConfig(*this, PM);
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1da4d36043045..49c67e0819f72 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -114,7 +114,7 @@ namespace {
/// Sparc Code Generator Pass Configuration Options.
class SparcPassConfig : public TargetPassConfig {
public:
- SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM)
+ SparcPassConfig(SparcTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
SparcTargetMachine &getSparcTargetMachine() const {
@@ -128,7 +128,7 @@ public:
} // namespace
TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new SparcPassConfig(this, PM);
+ return new SparcPassConfig(*this, PM);
}
void SparcPassConfig::addIRPasses() {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 48193fe095bed..faf714cbe2c98 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -40,6 +40,10 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
/// Sparc 32-bit target machine
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index ede5005fa4916..f30d52f859d75 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -119,7 +119,7 @@ namespace {
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
public:
- SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM)
+ SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
SystemZTargetMachine &getSystemZTargetMachine() const {
@@ -212,7 +212,7 @@ void SystemZPassConfig::addPreEmitPass() {
}
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new SystemZPassConfig(this, PM);
+ return new SystemZPassConfig(*this, PM);
}
TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index a10ca64fa6329..eb2f17a2091c3 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -51,6 +51,8 @@ public:
}
bool targetSchedulesPostRAScheduling() const override { return true; };
+
+ bool isMachineVerifierClean() const override { return false; }
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index b974681fb6afa..d9b2b8743649e 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -129,7 +129,7 @@ namespace {
/// WebAssembly Code Generator Pass Configuration Options.
class WebAssemblyPassConfig final : public TargetPassConfig {
public:
- WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM)
+ WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const {
@@ -154,7 +154,7 @@ TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
TargetPassConfig *
WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new WebAssemblyPassConfig(this, PM);
+ return new WebAssemblyPassConfig(*this, PM);
}
FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 6e062ec59347b..b5a926f915afa 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -587,6 +587,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VPSLLDQZ256rr:
case X86::VPSLLDQZ512rr:
Src1Name = getRegName(MI->getOperand(1).getReg());
+ LLVM_FALLTHROUGH;
case X86::VPSLLDQZ128rm:
case X86::VPSLLDQZ256rm:
case X86::VPSLLDQZ512rm:
@@ -604,6 +605,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VPSRLDQZ256rr:
case X86::VPSRLDQZ512rr:
Src1Name = getRegName(MI->getOperand(1).getReg());
+ LLVM_FALLTHROUGH;
case X86::VPSRLDQZ128rm:
case X86::VPSRLDQZ256rm:
case X86::VPSRLDQZ512rm:
@@ -1091,6 +1093,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask);
@@ -1099,6 +1102,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 313920e02c3ed..5582526541bae 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -123,18 +123,26 @@ namespace {
EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
- static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
unsigned Mask = 0;
- for (const auto &LI : MBB->liveins()) {
- if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6)
- continue;
- Mask |= 1 << (LI.PhysReg - X86::FP0);
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin();
+ I != MBB->livein_end(); ) {
+ MCPhysReg Reg = I->PhysReg;
+ static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ Mask |= 1 << (Reg - X86::FP0);
+ if (RemoveFPs) {
+ I = MBB->removeLiveIn(I);
+ continue;
+ }
+ }
+ ++I;
}
return Mask;
}
// Partition all the CFG edges into LiveBundles.
- void bundleCFG(MachineFunction &MF);
+ void bundleCFGRecomputeKillFlags(MachineFunction &MF);
MachineBasicBlock *MBB; // Current basic block
@@ -327,7 +335,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
// Prepare cross-MBB liveness.
- bundleCFG(MF);
+ bundleCFGRecomputeKillFlags(MF);
StackTop = 0;
@@ -375,13 +383,15 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
/// registers live-out from a block is identical to the live-in set of all
/// successors. This is not enforced by the normal live-in lists since
/// registers may be implicitly defined, or not used by all successors.
-void FPS::bundleCFG(MachineFunction &MF) {
+void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
assert(LiveBundles.empty() && "Stale data in LiveBundles");
LiveBundles.resize(Bundles->getNumBundles());
// Gather the actual live-in masks for all MBBs.
for (MachineBasicBlock &MBB : MF) {
- const unsigned Mask = calcLiveInMask(&MBB);
+ setKillFlags(MBB);
+
+ const unsigned Mask = calcLiveInMask(&MBB, false);
if (!Mask)
continue;
// Update MBB ingoing bundle mask.
@@ -396,7 +406,6 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
- setKillFlags(BB);
setupBlockStack();
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
@@ -453,6 +462,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
unsigned Reg = DeadRegs[i];
// Check if Reg is live on the stack. An inline-asm register operand that
// is in the clobber list and marked dead might not be live on the stack.
+ static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
freeStackSlotAfter(I, Reg-X86::FP0);
@@ -506,7 +516,6 @@ void FPS::setupBlockStack() {
// Push the fixed live-in registers.
for (unsigned i = Bundle.FixCount; i > 0; --i) {
- MBB->addLiveIn(X86::ST0+i-1);
DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
<< unsigned(Bundle.FixStack[i-1]) << '\n');
pushReg(Bundle.FixStack[i-1]);
@@ -515,7 +524,8 @@ void FPS::setupBlockStack() {
// Kill off unwanted live-ins. This can happen with a critical edge.
// FIXME: We could keep these live registers around as zombies. They may need
// to be revived at the end of a short block. It might save a few instrs.
- adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
+ adjustLiveRegs(Mask, MBB->begin());
DEBUG(MBB->dump());
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 331e56976db7c..328a80304602f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1062,6 +1062,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP) {
+ assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
+
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
// If required, include space for extra hidden slot for stashing base pointer.
@@ -1124,13 +1126,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
nullptr, DwarfFramePtr));
}
}
-
- // Mark the FramePtr as live-in in every block. Don't do this again for
- // funclet prologues.
- if (!IsFunclet) {
- for (MachineBasicBlock &EveryMBB : MF)
- EveryMBB.addLiveIn(MachineFramePtr);
- }
} else {
assert(!IsFunclet && "funclets without FPs not yet implemented");
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c899f0fd5100e..2a1633de0a239 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -418,8 +418,6 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
case X86ISD::XOR:
case X86ISD::OR:
case ISD::ADD:
- case ISD::ADDC:
- case ISD::ADDE:
case ISD::ADDCARRY:
case ISD::AND:
case ISD::OR:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8d78308afe9df..0a41f35f93208 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,3 +1,4 @@
+
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
@@ -80,12 +81,6 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
-static cl::opt<bool> MulConstantOptimization(
- "mul-constant-optimization", cl::init(true),
- cl::desc("Replace 'mul x, Const' with more effective instructions like "
- "SHIFT, LEA, etc."),
- cl::Hidden);
-
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@@ -317,16 +312,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
}
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- if (VT == MVT::i64 && !Subtarget.is64Bit())
- continue;
- // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
- setOperationAction(ISD::ADDC, VT, Custom);
- setOperationAction(ISD::ADDE, VT, Custom);
- setOperationAction(ISD::SUBC, VT, Custom);
- setOperationAction(ISD::SUBE, VT, Custom);
- }
-
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
@@ -428,7 +413,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
continue;
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SETCCE, VT, Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
@@ -1583,6 +1567,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Support carry in as value rather than glue.
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
+ setOperationAction(ISD::SETCCCARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
@@ -16304,6 +16289,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
case ISD::SHL:
if (Op.getNode()->getFlags().hasNoSignedWrap())
break;
+ LLVM_FALLTHROUGH;
default:
NeedOF = true;
break;
@@ -17167,17 +17153,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: Invert = true;
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
- case ISD::SETLT: Swap = true;
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
- case ISD::SETGE: Swap = true;
+ case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETLE: Opc = X86ISD::PCMPGT;
Invert = true; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = X86ISD::PCMPGT;
FlipSigns = true; break;
- case ISD::SETUGE: Swap = true;
+ case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE: Opc = X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
@@ -17398,19 +17384,24 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
-SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
SDValue Cond = Op.getOperand(3);
SDLoc DL(Op);
- assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
- assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ // Recreate the carry if needed.
+ EVT CarryVT = Carry.getValueType();
+ APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
+ Carry, DAG.getConstant(NegOne, DL, CarryVT));
+
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
- SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
+ SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG);
if (Op.getSimpleValueType() == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@@ -23269,32 +23260,6 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getNode()->getSimpleValueType(0);
-
- // Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
- unsigned Opc;
- bool ExtraOp = false;
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid code");
- case ISD::ADDC: Opc = X86ISD::ADD; break;
- case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
- case ISD::SUBC: Opc = X86ISD::SUB; break;
- case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
- }
-
- if (!ExtraOp)
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
- Op.getOperand(1));
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
- Op.getOperand(1), Op.getOperand(2));
-}
-
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
@@ -23785,7 +23750,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::SETCCE: return LowerSETCCE(Op, DAG);
+ case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -23830,10 +23795,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::SUBC:
- case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
@@ -28946,12 +28907,118 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+// Try to match patterns such as
+// (i16 bitcast (v16i1 x))
+// ->
+// (i16 movmsk (16i8 sext (v16i1 x)))
+// before the illegal vector is scalarized on subtargets that don't have legal
+// vxi1 types.
+static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
+ const X86Subtarget &Subtarget) {
+ EVT VT = BitCast.getValueType();
+ SDValue N0 = BitCast.getOperand(0);
+ EVT VecVT = N0->getValueType(0);
+
+ if (!VT.isScalarInteger() || !VecVT.isSimple())
+ return SDValue();
+
+ // With AVX512 vxi1 types are legal and we prefer using k-regs.
+ // MOVMSK is supported in SSE2 or later.
+ if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ return SDValue();
+
+ // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
+ // v8f64. So all legal 128-bit and 256-bit vectors are covered except for
+ // v8i16 and v16i16.
+ // For these two cases, we can shuffle the upper element bytes to a
+ // consecutive sequence at the start of the vector and treat the results as
+ // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However,
+ // for v16i16 this is not the case, because the shuffle is expensive, so we
+ // avoid sign-exteding to this type entirely.
+ // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
+ // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
+ MVT SExtVT;
+ MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ switch (VecVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i1:
+ SExtVT = MVT::v2i64;
+ FPCastVT = MVT::v2f64;
+ break;
+ case MVT::v4i1:
+ SExtVT = MVT::v4i32;
+ FPCastVT = MVT::v4f32;
+ // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
+ // sign-extend to a 256-bit operation to avoid truncation.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v4i64;
+ FPCastVT = MVT::v4f64;
+ }
+ break;
+ case MVT::v8i1:
+ SExtVT = MVT::v8i16;
+ // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
+ // sign-extend to a 256-bit operation to match the compare.
+ // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
+ // 256-bit because the shuffle is cheaper than sign extending the result of
+ // the compare.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v8i32;
+ FPCastVT = MVT::v8f32;
+ }
+ break;
+ case MVT::v16i1:
+ SExtVT = MVT::v16i8;
+ // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
+ // it is not profitable to sign-extend to 256-bit because this will
+ // require an extra cross-lane shuffle which is more exprensive than
+ // truncating the result of the compare to 128-bits.
+ break;
+ case MVT::v32i1:
+ // TODO: Handle pre-AVX2 cases by splitting to two v16i1's.
+ if (!Subtarget.hasInt256())
+ return SDValue();
+ SExtVT = MVT::v32i8;
+ break;
+ };
+
+ SDLoc DL(BitCast);
+ SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+ if (SExtVT == MVT::v8i16) {
+ V = DAG.getBitcast(MVT::v16i8, V);
+ V = DAG.getVectorShuffle(
+ MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
+ {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
+ } else
+ assert(SExtVT.getScalarType() != MVT::i16 &&
+ "Vectors of i16 must be shuffled");
+ if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ V = DAG.getBitcast(FPCastVT, V);
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
+ return DAG.getZExtOrTrunc(V, DL, VT);
+}
+
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
+ // Try to match patterns such as
+ // (i16 bitcast (v16i1 x))
+ // ->
+ // (i16 movmsk (16i8 sext (v16i1 x)))
+ // before the setcc result is scalarized on subtargets that don't have legal
+ // vxi1 types.
+ if (DCI.isBeforeLegalize())
+ if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
+ return V;
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
@@ -29944,6 +30011,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
@@ -29974,6 +30042,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
@@ -30008,6 +30077,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
@@ -30036,6 +30106,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
@@ -30933,75 +31004,6 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
}
}
-static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
- EVT VT, SDLoc DL) {
-
- auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
- SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(Mult, DL, VT));
- Result = DAG.getNode(ISD::SHL, DL, VT, Result,
- DAG.getConstant(Shift, DL, MVT::i8));
- Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
- Result);
- return Result;
- };
-
- auto combineMulMulAddOrSub = [&](bool isAdd) {
- SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(9, DL, VT));
- Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
- Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
- Result);
- return Result;
- };
-
- switch (MulAmt) {
- default:
- break;
- case 11:
- // mul x, 11 => add ((shl (mul x, 5), 1), x)
- return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
- case 21:
- // mul x, 21 => add ((shl (mul x, 5), 2), x)
- return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
- case 22:
- // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
- case 19:
- // mul x, 19 => sub ((shl (mul x, 5), 2), x)
- return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
- case 13:
- // mul x, 13 => add ((shl (mul x, 3), 2), x)
- return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
- case 23:
- // mul x, 13 => sub ((shl (mul x, 3), 3), x)
- return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
- case 14:
- // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
- case 26:
- // mul x, 26 => sub ((mul (mul x, 9), 3), x)
- return combineMulMulAddOrSub(/*isAdd*/ false);
- case 28:
- // mul x, 28 => add ((mul (mul x, 9), 3), x)
- return combineMulMulAddOrSub(/*isAdd*/ true);
- case 29:
- // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulMulAddOrSub(/*isAdd*/ true));
- case 30:
- // mul x, 30 => sub (sub ((shl x, 5), x), x)
- return DAG.getNode(
- ISD::SUB, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(5, DL, MVT::i8))));
- }
- return SDValue();
-}
-
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -31011,8 +31013,6 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
- if (!MulConstantOptimization)
- return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
@@ -31068,8 +31068,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
- } else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ }
if (!NewMul) {
assert(MulAmt != 0 &&
@@ -34558,8 +34557,7 @@ static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
isOneConstant(Carry.getOperand(1))))
Carry = Carry.getOperand(0);
- if (Carry.getOpcode() == ISD::SETCC ||
- Carry.getOpcode() == X86ISD::SETCC ||
+ if (Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
if (Carry.getConstantOperandVal(0) == X86::COND_B)
return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
@@ -35126,7 +35124,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return combineBitcast(N, DAG, Subtarget);
+ case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
@@ -35510,6 +35508,7 @@ TargetLowering::ConstraintWeight
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ LLVM_FALLTHROUGH;
case 'R':
case 'q':
case 'Q':
@@ -35861,6 +35860,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
+ LLVM_FALLTHROUGH;
// 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32 || VT == MVT::f32)
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 18106c2eb3941..f51b6641db2fb 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1163,7 +1163,7 @@ namespace llvm {
SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 33fbd41bb6318..0aee30081a35c 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -195,6 +195,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
// It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode.
return false;
+ LLVM_FALLTHROUGH;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 53a8e83b36fca..cb21f1bd7706f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -323,7 +323,7 @@ namespace {
/// X86 Code Generator Pass Configuration Options.
class X86PassConfig : public TargetPassConfig {
public:
- X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM)
+ X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
X86TargetMachine &getX86TargetMachine() const {
@@ -369,7 +369,7 @@ INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix",
"X86 Execution Dependency Fix", false, false)
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new X86PassConfig(this, PM);
+ return new X86PassConfig(*this, PM);
}
void X86PassConfig::addIRPasses() {
@@ -433,6 +433,7 @@ bool X86PassConfig::addPreISel() {
void X86PassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&LiveRangeShrinkID);
addPass(createX86FixupSetCC());
addPass(createX86OptimizeLEAs());
addPass(createX86CallFrameOptimization());
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index cf933f52604ef..1bf267d34ec2c 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -49,6 +49,10 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 2950e2efbea3e..1a1cbd4748887 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -54,7 +54,7 @@ namespace {
/// XCore Code Generator Pass Configuration Options.
class XCorePassConfig : public TargetPassConfig {
public:
- XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM)
+ XCorePassConfig(XCoreTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
XCoreTargetMachine &getXCoreTargetMachine() const {
@@ -70,7 +70,7 @@ public:
} // end anonymous namespace
TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new XCorePassConfig(this, PM);
+ return new XCorePassConfig(*this, PM);
}
void XCorePassConfig::addIRPasses() {