summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-02-13 14:57:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-02-13 14:57:10 +0000
commita322a4af1fe8b989fe5d1bbc15de8736a26c03ca (patch)
tree0daaa3c98a8029d259c5918dfa1c13c9d4fe7971 /test/CodeGen
parentaff3ef6f6ccad9bd4e082546f8161727c2019117 (diff)
Notes
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/fp16-v4-instructions.ll274
-rw-r--r--test/CodeGen/AArch64/fp16-v8-instructions.ll84
-rw-r--r--test/CodeGen/AMDGPU/hsa-note-no-func.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll1
-rw-r--r--test/CodeGen/AMDGPU/spill-scavenge-offset.ll33
-rw-r--r--test/CodeGen/ARM/shifter_operand.ll17
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ret.ll9
-rw-r--r--test/CodeGen/PowerPC/inline-asm-s-modifier.ll10
-rw-r--r--test/CodeGen/PowerPC/pr26193.ll9
-rw-r--r--test/CodeGen/PowerPC/pr26356.ll136
-rw-r--r--test/CodeGen/PowerPC/pr26381.ll8
-rw-r--r--test/CodeGen/SystemZ/int-cmp-53.ll26
-rw-r--r--test/CodeGen/X86/avx512-gather-scatter-intrin.ll63
-rw-r--r--test/CodeGen/X86/setcc-lowering.ll79
14 files changed, 730 insertions, 21 deletions
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
index f6e4bdf734599..b892f1902b03b 100644
--- a/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -267,4 +267,278 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
ret <4 x i16> %1
}
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_une:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, ne
+; CHECK-DAG: csel {{.*}}, wzr, ne
+; CHECK-DAG: csel {{.*}}, wzr, ne
+; CHECK-DAG: csel {{.*}}, wzr, ne
+define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp une <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ueq:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, vs
+; CHECK-DAG: csel {{.*}}, vs
+; CHECK-DAG: csel {{.*}}, vs
+; CHECK-DAG: csel {{.*}}, vs
+define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ueq <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ugt:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, hi
+; CHECK-DAG: csel {{.*}}, wzr, hi
+; CHECK-DAG: csel {{.*}}, wzr, hi
+; CHECK-DAG: csel {{.*}}, wzr, hi
+define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ugt <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_uge:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, pl
+; CHECK-DAG: csel {{.*}}, wzr, pl
+; CHECK-DAG: csel {{.*}}, wzr, pl
+; CHECK-DAG: csel {{.*}}, wzr, pl
+define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp uge <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ult:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, lt
+; CHECK-DAG: csel {{.*}}, wzr, lt
+; CHECK-DAG: csel {{.*}}, wzr, lt
+; CHECK-DAG: csel {{.*}}, wzr, lt
+define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ult <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ule:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, le
+; CHECK-DAG: csel {{.*}}, wzr, le
+; CHECK-DAG: csel {{.*}}, wzr, le
+; CHECK-DAG: csel {{.*}}, wzr, le
+define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ule <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_uno:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, vs
+; CHECK-DAG: csel {{.*}}, wzr, vs
+; CHECK-DAG: csel {{.*}}, wzr, vs
+; CHECK-DAG: csel {{.*}}, wzr, vs
+define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp uno <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_one:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, gt
+; CHECK-DAG: csel {{.*}}, gt
+; CHECK-DAG: csel {{.*}}, gt
+; CHECK-DAG: csel {{.*}}, gt
+define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp one <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_oeq:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csel {{.*}}, wzr, eq
+define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp oeq <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ogt:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, gt
+; CHECK-DAG: csel {{.*}}, wzr, gt
+; CHECK-DAG: csel {{.*}}, wzr, gt
+; CHECK-DAG: csel {{.*}}, wzr, gt
+define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ogt <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_oge:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, ge
+; CHECK-DAG: csel {{.*}}, wzr, ge
+; CHECK-DAG: csel {{.*}}, wzr, ge
+; CHECK-DAG: csel {{.*}}, wzr, ge
+define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp oge <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_olt:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csel {{.*}}, wzr, mi
+define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp olt <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ole:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, ls
+; CHECK-DAG: csel {{.*}}, wzr, ls
+; CHECK-DAG: csel {{.*}}, wzr, ls
+; CHECK-DAG: csel {{.*}}, wzr, ls
+define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ole <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
+; Function Attrs: nounwind readnone
+; CHECK-LABEL: test_fcmp_ord:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: csel {{.*}}, wzr, vc
+; CHECK-DAG: csel {{.*}}, wzr, vc
+; CHECK-DAG: csel {{.*}}, wzr, vc
+; CHECK-DAG: csel {{.*}}, wzr, vc
+define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
+ %1 = fcmp ord <4 x half> %a, %b
+ ret <4 x i1> %1
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 137d1f358a304..2f70f3635d195 100644
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -421,4 +421,88 @@ define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
ret <8 x i16> %1
}
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp une <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 16 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ueq <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ugt <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp uge <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ult <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ule <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp uno <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp one <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp oeq <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ogt <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp oge <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp olt <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ole <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
+; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped.
+define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 {
+ %1 = fcmp ord <8 x half> %a, %b
+ ret <8 x i1> %1
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
index 0e4662231b4fb..f82e98e79545f 100644
--- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=HSA --check-prefix=HSA-FIJI %s
; HSA: .hsa_code_object_version 1,0
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
+; HSA-FIJI: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
diff --git a/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll b/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
index 3d05da616e4ee..fdc3240870150 100644
--- a/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
+++ b/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
@@ -1,5 +1,6 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=kabini -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
+;RUN: llc < %s -march=amdgcn -mcpu=stoney -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;GCN-LABEL: {{^}}main:
diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
new file mode 100644
index 0000000000000..4a12ed545b81f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=amdgcn -mcpu=verde < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck %s
+
+; When the offset of VGPR spills into scratch space gets too large, an additional SGPR
+; is used to calculate the scratch load/store address. Make sure that this
+; mechanism works even when many spills happen.
+
+; Just test that it compiles successfully.
+; CHECK-LABEL: test
+define void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in,
+ <96 x i32> addrspace(1)* %sdata_out, <96 x i32> %sdata_in) {
+entry:
+ %tid = call i32 @llvm.SI.tid() nounwind readnone
+
+ %aptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %in, i32 %tid
+ %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr
+
+; mark most VGPR registers as used to increase register pressure
+ call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" ()
+ call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" ()
+ call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" ()
+ call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" ()
+ call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" ()
+ call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" ()
+ call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" ()
+
+ %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %in, i32 %tid
+ store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr
+
+ ret void
+}
+
+declare i32 @llvm.SI.tid() nounwind readnone
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 5d44eb0f11d12..e5f9b118527a7 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -239,3 +239,20 @@ define void @test_well_formed_dag(i32 %in1, i32 %in2, i32* %addr) {
store i32 %add, i32* %addr
ret void
}
+
+define { i32, i32 } @test_multi_use_add(i32 %base, i32 %offset) {
+; CHECK-LABEL: test_multi_use_add:
+; CHECK-THUMB: movs [[CONST:r[0-9]+]], #28
+; CHECK-THUMB: movt [[CONST]], #1
+
+ %prod = mul i32 %offset, 65564
+ %sum = add i32 %base, %prod
+
+ %ptr = inttoptr i32 %sum to i32*
+ %loaded = load i32, i32* %ptr
+
+ %ret.tmp = insertvalue { i32, i32 } undef, i32 %sum, 0
+ %ret = insertvalue { i32, i32 } %ret.tmp, i32 %loaded, 1
+
+ ret { i32, i32 } %ret
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index e05ef7d9ab824..0adb5a9351090 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -186,3 +186,12 @@ entry:
; ELF64: blr
ret i32 -1
}
+
+define zeroext i16 @ret20() nounwind {
+entry:
+; ELF64-LABEL: ret20
+; ELF64: lis{{.*}}0
+; ELF64: ori{{.*}}32768
+; ELF64: blr
+ ret i16 32768
+}
diff --git a/test/CodeGen/PowerPC/inline-asm-s-modifier.ll b/test/CodeGen/PowerPC/inline-asm-s-modifier.ll
new file mode 100644
index 0000000000000..c8b00b6deb6eb
--- /dev/null
+++ b/test/CodeGen/PowerPC/inline-asm-s-modifier.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define void @test() {
+entry:
+ call void asm sideeffect "mtfsb1 ${0:s}", "i"(i32 7), !srcloc !1
+ ret void
+}
+; CHECK: #APP
+; CHECK-NEXT: mtfsb1 25
+
+!1 = !{i32 40}
diff --git a/test/CodeGen/PowerPC/pr26193.ll b/test/CodeGen/PowerPC/pr26193.ll
new file mode 100644
index 0000000000000..acd99bc0331a3
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr26193.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define <8 x i16> @test(<4 x i32> %a) {
+entry:
+ %0 = tail call <8 x i16> @llvm.ppc.altivec.vpkswss(<4 x i32> %a, <4 x i32> %a)
+ ret <8 x i16> %0
+}
+; CHECK: vpkswss 2,
+
+declare <8 x i16> @llvm.ppc.altivec.vpkswss(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/PowerPC/pr26356.ll b/test/CodeGen/PowerPC/pr26356.ll
new file mode 100644
index 0000000000000..0f5d877b57644
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr26356.ll
@@ -0,0 +1,136 @@
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+define zeroext i32 @f1() {
+entry:
+ ret i32 65535
+}
+; CHECK-LABEL: @f1
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 65535
+
+define zeroext i32 @f2() {
+entry:
+ ret i32 32768
+}
+; CHECK-LABEL: @f2
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 32768
+
+define zeroext i32 @f3() {
+entry:
+ ret i32 32767
+}
+; CHECK-LABEL: @f3
+; CHECK: li 3, 32767
+
+define zeroext i16 @f4() {
+entry:
+ ret i16 65535
+}
+; CHECK-LABEL: @f4
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 65535
+
+define zeroext i16 @f5() {
+entry:
+ ret i16 32768
+}
+; CHECK-LABEL: @f5
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 32768
+
+define zeroext i16 @f6() {
+entry:
+ ret i16 32767
+}
+; CHECK-LABEL: @f6
+; CHECK: li 3, 32767
+
+define zeroext i16 @f7() {
+entry:
+ ret i16 -1
+}
+; CHECK-LABEL: @f7
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 65535
+
+define zeroext i16 @f8() {
+entry:
+ ret i16 -32768
+}
+; CHECK-LABEL: @f8
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 32768
+
+define signext i32 @f1s() {
+entry:
+ ret i32 65535
+}
+; CHECK-LABEL: @f1s
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 65535
+
+define signext i32 @f2s() {
+entry:
+ ret i32 32768
+}
+; CHECK-LABEL: @f2s
+; CHECK: lis 3, 0
+; CHECK: ori 3, 3, 32768
+
+define signext i32 @f3s() {
+entry:
+ ret i32 32767
+}
+; CHECK-LABEL: @f3s
+; CHECK: li 3, 32767
+
+define signext i16 @f4s() {
+entry:
+ ret i16 32767
+}
+; CHECK-LABEL: @f4s
+; CHECK: li 3, 32767
+
+define signext i32 @f1sn() {
+entry:
+ ret i32 -65535
+}
+; CHECK-LABEL: @f1sn
+; CHECK: lis 3, -1
+; CHECK: ori 3, 3, 1
+
+define signext i32 @f2sn() {
+entry:
+ ret i32 -32768
+}
+; CHECK-LABEL: @f2sn
+; CHECK: li 3, -32768
+
+define signext i32 @f3sn() {
+entry:
+ ret i32 -32767
+}
+; CHECK-LABEL: @f3sn
+; CHECK: li 3, -32767
+
+define signext i32 @f4sn() {
+entry:
+ ret i32 -65536
+}
+; CHECK-LABEL: @f4sn
+; CHECK: lis 3, -1
+
+define signext i16 @f5sn() {
+entry:
+ ret i16 -32767
+}
+; CHECK-LABEL: @f5sn
+; CHECK: li 3, -32767
+
+define signext i16 @f6sn() {
+entry:
+ ret i16 -32768
+}
+; CHECK-LABEL: @f6sn
+; CHECK: li 3, -32768
diff --git a/test/CodeGen/PowerPC/pr26381.ll b/test/CodeGen/PowerPC/pr26381.ll
new file mode 100644
index 0000000000000..a45288e545f2f
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr26381.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s
+
+define internal signext i32 @foo() #0 {
+ ret i32 -125452974
+}
+
+; CHECK: lis 3, -1915
+; CHECK: ori 3, 3, 48466
diff --git a/test/CodeGen/SystemZ/int-cmp-53.ll b/test/CodeGen/SystemZ/int-cmp-53.ll
new file mode 100644
index 0000000000000..b7d985eeefe7c
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-53.ll
@@ -0,0 +1,26 @@
+; This used to incorrectly use a TMLL for an always-false test at -O0.
+;
+; RUN: llc -O0 < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @test(i8 *%input, i32 *%result) {
+entry:
+; CHECK-NOT: tmll
+
+ %0 = load i8, i8* %input, align 1
+ %1 = trunc i8 %0 to i1
+ %2 = zext i1 %1 to i32
+ %3 = icmp sge i32 %2, 0
+ br i1 %3, label %if.then, label %if.else
+
+if.then:
+ store i32 1, i32* %result, align 4
+ br label %return
+
+if.else:
+ store i32 0, i32* %result, align 4
+ br label %return
+
+return:
+ ret void
+}
+
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 3bc67cceaab5d..9ba18192f5d29 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -259,18 +259,22 @@ define void @prefetch(<8 x i64> %ind, i8* %base) {
; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovb %eax, %k1
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
+; CHECK-NEXT: movb $120, %al
+; CHECK-NEXT: kmovb %eax, %k1
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
+ call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 1)
+ call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
+ call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 1)
ret void
}
-
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
@@ -790,3 +794,54 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <
ret void
}
+define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
+; CHECK-LABEL: scatter_mask_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovb %eax, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: movb $96, %al
+; CHECK-NEXT: kmovb %eax, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ ret void
+}
+
+define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base) {
+; CHECK-LABEL: gather_mask_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
+; CHECK-NEXT: movw $1, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1}
+; CHECK-NEXT: movw $220, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm4, %zmm1, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 -1, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 0, i32 4)
+ %res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 1, i32 4)
+ %res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 220, i32 4)
+
+ %res4 = fadd <16 x float> %res, %res1
+ %res5 = fadd <16 x float> %res3, %res2
+ %res6 = fadd <16 x float> %res5, %res4
+ ret <16 x float> %res6
+}
diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll
index 77739e72fcc80..91b42bd67767c 100644
--- a/test/CodeGen/X86/setcc-lowering.ll
+++ b/test/CodeGen/X86/setcc-lowering.ll
@@ -1,26 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=knl < %s | FileCheck %s --check-prefix=KNL-32
+
; Verify that we don't crash during codegen due to a wrong lowering
; of a setcc node with illegal operand types and return type.
define <8 x i16> @pr25080(<8 x i32> %a) {
-; CHECK-LABEL: pr25080:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
-; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; AVX-LABEL: pr25080:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
entry:
%0 = trunc <8 x i32> %a to <8 x i23>
%1 = icmp eq <8 x i23> %0, zeroinitializer
@@ -28,3 +30,46 @@ entry:
%3 = sext <8 x i1> %2 to <8 x i16>
ret <8 x i16> %3
}
+
+define void @pr26232(i64 %a) {
+; KNL-32-LABEL: pr26232:
+; KNL-32: # BB#0: # %for_test11.preheader
+; KNL-32-NEXT: pushl %esi
+; KNL-32-NEXT: .Ltmp0:
+; KNL-32-NEXT: .cfi_def_cfa_offset 8
+; KNL-32-NEXT: .Ltmp1:
+; KNL-32-NEXT: .cfi_offset %esi, -8
+; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; KNL-32-NEXT: movw $-1, %dx
+; KNL-32-NEXT: .align 16, 0x90
+; KNL-32-NEXT: .LBB1_1: # %for_loop599
+; KNL-32-NEXT: # =>This Inner Loop Header: Depth=1
+; KNL-32-NEXT: cmpl $65536, %ecx # imm = 0x10000
+; KNL-32-NEXT: movl %eax, %esi
+; KNL-32-NEXT: sbbl $0, %esi
+; KNL-32-NEXT: movl $0, %esi
+; KNL-32-NEXT: cmovlw %dx, %si
+; KNL-32-NEXT: testw %si, %si
+; KNL-32-NEXT: jne .LBB1_1
+; KNL-32-NEXT: # BB#2: # %for_exit600
+; KNL-32-NEXT: popl %esi
+; KNL-32-NEXT: retl
+allocas:
+ br label %for_test11.preheader
+
+for_test11.preheader: ; preds = %for_test11.preheader, %allocas
+ br i1 undef, label %for_loop599, label %for_test11.preheader
+
+for_loop599: ; preds = %for_loop599, %for_test11.preheader
+ %less_i_load605_ = icmp slt i64 %a, 65536
+ %less_i_load605__broadcast_init = insertelement <16 x i1> undef, i1 %less_i_load605_, i32 0
+ %less_i_load605__broadcast = shufflevector <16 x i1> %less_i_load605__broadcast_init, <16 x i1> undef, <16 x i32> zeroinitializer
+ %"oldMask&test607" = and <16 x i1> %less_i_load605__broadcast, undef
+ %intmask.i894 = bitcast <16 x i1> %"oldMask&test607" to i16
+ %res.i895 = icmp eq i16 %intmask.i894, 0
+ br i1 %res.i895, label %for_exit600, label %for_loop599
+
+for_exit600: ; preds = %for_loop599
+ ret void
+}