summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll24
-rw-r--r--test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll246
-rw-r--r--test/CodeGen/AMDGPU/amdgpu.private-memory.ll35
-rw-r--r--test/CodeGen/AMDGPU/basic-branch.ll1
-rw-r--r--test/CodeGen/AMDGPU/fdiv.ll251
-rw-r--r--test/CodeGen/AMDGPU/fp_to_sint.f64.ll23
-rw-r--r--test/CodeGen/AMDGPU/fp_to_sint.ll28
-rw-r--r--test/CodeGen/AMDGPU/fp_to_uint.f64.ll23
-rw-r--r--test/CodeGen/AMDGPU/fp_to_uint.ll30
-rw-r--r--test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll8
-rw-r--r--test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll9
-rw-r--r--test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll9
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll18
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll (renamed from test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticgroup.ll)24
-rw-r--r--test/CodeGen/AMDGPU/rcp-pattern.ll91
-rw-r--r--test/CodeGen/AMDGPU/reciprocal.ll13
-rw-r--r--test/CodeGen/AMDGPU/skip-if-dead.ll2
-rw-r--r--test/CodeGen/AMDGPU/vector-alloca.ll119
-rw-r--r--test/CodeGen/AMDGPU/wqm.ll154
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll2
-rw-r--r--test/CodeGen/ARM/ssat-v4t.ll9
-rw-r--r--test/CodeGen/ARM/ssat.ll30
-rw-r--r--test/CodeGen/ARM/usat-v4t.ll9
-rw-r--r--test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll16
-rw-r--r--test/CodeGen/Mips/2010-07-20-Switch.ll6
-rw-r--r--test/CodeGen/Mips/analyzebranch.ll6
-rw-r--r--test/CodeGen/Mips/atomic.ll138
-rw-r--r--test/CodeGen/Mips/blez_bgez.ll4
-rw-r--r--test/CodeGen/Mips/blockaddr.ll32
-rw-r--r--test/CodeGen/Mips/ehframe-indirect.ll12
-rw-r--r--test/CodeGen/Mips/fcmp.ll16
-rw-r--r--test/CodeGen/Mips/fpbr.ll30
-rw-r--r--test/CodeGen/Mips/jumptable_labels.ll75
-rw-r--r--test/CodeGen/Mips/llvm-ir/ashr.ll8
-rw-r--r--test/CodeGen/Mips/llvm-ir/indirectbr.ll4
-rw-r--r--test/CodeGen/Mips/llvm-ir/lshr.ll8
-rw-r--r--test/CodeGen/Mips/llvm-ir/select-dbl.ll38
-rw-r--r--test/CodeGen/Mips/llvm-ir/select-flt.ll40
-rw-r--r--test/CodeGen/Mips/llvm-ir/select-int.ll27
-rw-r--r--test/CodeGen/Mips/llvm-ir/shl.ll8
-rw-r--r--test/CodeGen/Mips/longbranch.ll14
-rw-r--r--test/CodeGen/Mips/msa/basic_operations.ll40
-rw-r--r--test/CodeGen/Mips/msa/basic_operations_float.ll36
-rw-r--r--test/CodeGen/Mips/octeon.ll16
-rw-r--r--test/CodeGen/X86/avx-intrinsics-fast-isel.ll6
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll25
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll37
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll347
-rw-r--r--test/CodeGen/X86/pr28504.ll37
-rw-r--r--test/CodeGen/X86/pr28824.ll23
-rw-r--r--test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll11
-rw-r--r--test/CodeGen/X86/sse-intrinsics-fast-isel.ll18
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll11
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-fast-isel.ll40
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll13
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-x86.ll56
-rw-r--r--test/CodeGen/X86/tail-merge-after-mbp.ll94
-rw-r--r--test/CodeGen/X86/twoaddr-lea.ll57
58 files changed, 1940 insertions, 567 deletions
diff --git a/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll b/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll
new file mode 100644
index 000000000000..a71b5e86138d
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=aarch64-linux-eabi -o - | FileCheck %s
+
+%struct.a= type { i64, i64, i64, i64 }
+
+; DAG combine will try to perform a transformation that creates a vcvtfp2fxs
+; with a v4f64 input. Since v4i64 is not legal we should bail out. We can
+; pottentially still create the vcvtfp2fxs node after legalization (but on a
+; v2f64).
+
+; CHECK-LABEL: fun1
+define void @fun1() local_unnamed_addr {
+entry:
+ %mul = fmul <4 x double> zeroinitializer, <double 6.553600e+04, double 6.553600e+04, double 6.553600e+04, double 6.553600e+04>
+ %toi = fptosi <4 x double> %mul to <4 x i64>
+ %ptr = getelementptr inbounds %struct.a, %struct.a* undef, i64 0, i32 2
+ %elem = extractelement <4 x i64> %toi, i32 1
+ store i64 %elem, i64* %ptr, align 8
+ call void @llvm.trap()
+ unreachable
+}
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap()
+
diff --git a/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll b/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll
index a12132f425d9..d78c75165be2 100644
--- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll
+++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll
@@ -1,8 +1,246 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck %s
-; RUN: opt -S -amdgpu-codegenprepare < %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
+; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
; Make sure this doesn't crash with no triple
-; CHECK-LABEL: @foo(
-define void @foo() {
+; NOOP-LABEL: @noop_fdiv_fpmath(
+; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
+define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
+ %md.25ulp = fdiv float %a, %b, !fpmath !0
+ store volatile float %md.25ulp, float addrspace(1)* %out
ret void
}
+
+; CHECK-LABEL: @fdiv_fpmath(
+; CHECK: %no.md = fdiv float %a, %b{{$}}
+; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
+; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
+; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
+; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
+; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
+; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
+define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
+ %no.md = fdiv float %a, %b
+ store volatile float %no.md, float addrspace(1)* %out
+
+ %md.half.ulp = fdiv float %a, %b, !fpmath !1
+ store volatile float %md.half.ulp, float addrspace(1)* %out
+
+ %md.1ulp = fdiv float %a, %b, !fpmath !2
+ store volatile float %md.1ulp, float addrspace(1)* %out
+
+ %md.25ulp = fdiv float %a, %b, !fpmath !0
+ store volatile float %md.25ulp, float addrspace(1)* %out
+
+ %md.3ulp = fdiv float %a, %b, !fpmath !3
+ store volatile float %md.3ulp, float addrspace(1)* %out
+
+ %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
+ store volatile float %fast.md.25ulp, float addrspace(1)* %out
+
+ %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
+ store volatile float %arcp.md.25ulp, float addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @rcp_fdiv_fpmath(
+; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
+; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
+; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
+; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
+; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
+; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
+; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
+define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
+ %no.md = fdiv float 1.0, %x
+ store volatile float %no.md, float addrspace(1)* %out
+
+ %md.25ulp = fdiv float 1.0, %x, !fpmath !0
+ store volatile float %md.25ulp, float addrspace(1)* %out
+
+ %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
+ store volatile float %md.half.ulp, float addrspace(1)* %out
+
+ %arcp.no.md = fdiv arcp float 1.0, %x
+ store volatile float %arcp.no.md, float addrspace(1)* %out
+
+ %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
+ store volatile float %arcp.25ulp, float addrspace(1)* %out
+
+ %fast.no.md = fdiv fast float 1.0, %x
+ store volatile float %fast.no.md, float addrspace(1)* %out
+
+ %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
+ store volatile float %fast.25ulp, float addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @fdiv_fpmath_vector(
+; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
+; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
+; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
+
+; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
+; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
+; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
+; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
+; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
+; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
+; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
+; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
+define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
+ %no.md = fdiv <2 x float> %a, %b
+ store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
+
+ %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
+ store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
+
+ %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
+ store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
+
+ %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
+ store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
+; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
+; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
+; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
+; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
+
+; CHECK: extractelement <2 x float> %x
+; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
+; CHECK: extractelement <2 x float> %x
+; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
+; CHECK: store volatile <2 x float> %arcp.25ulp
+
+; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
+; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
+; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
+define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
+ %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
+ store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
+
+ %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
+ store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
+
+ %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
+ store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
+
+ %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
+ store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
+
+ %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
+ store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
+
+ %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
+ store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
+; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
+; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
+; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
+
+; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
+; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0
+; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
+; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0
+; CHECK: store volatile <2 x float> %arcp.25ulp
+
+; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
+; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0
+; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
+; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
+; CHECK: store volatile <2 x float> %fast.25ulp
+define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
+ %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
+ store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
+
+ %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
+ store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
+
+ %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
+ store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
+
+ %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
+ store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
+
+ %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
+ store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
+
+ ret void
+}
+
+; FIXME: Should be able to get fdiv for 1.0 component
+; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
+; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
+; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
+; CHECK: store volatile <2 x float> %arcp.25ulp
+
+; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
+; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
+; CHECK: store volatile <2 x float> %fast.25ulp
+define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
+ %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
+
+ %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
+ store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
+
+ %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
+ store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
+; CHECK: %no.md = fdiv float %a, %b{{$}}
+; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
+; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
+; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
+; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
+; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
+; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
+define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
+ %no.md = fdiv float %a, %b
+ store volatile float %no.md, float addrspace(1)* %out
+
+ %md.half.ulp = fdiv float %a, %b, !fpmath !1
+ store volatile float %md.half.ulp, float addrspace(1)* %out
+
+ %md.1ulp = fdiv float %a, %b, !fpmath !2
+ store volatile float %md.1ulp, float addrspace(1)* %out
+
+ %md.25ulp = fdiv float %a, %b, !fpmath !0
+ store volatile float %md.25ulp, float addrspace(1)* %out
+
+ %md.3ulp = fdiv float %a, %b, !fpmath !3
+ store volatile float %md.3ulp, float addrspace(1)* %out
+
+ %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
+ store volatile float %fast.md.25ulp, float addrspace(1)* %out
+
+ %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
+ store volatile float %arcp.md.25ulp, float addrspace(1)* %out
+
+ ret void
+}
+
+attributes #0 = { nounwind optnone noinline }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-features"="+fp32-denormals" }
+
+; CHECK: !0 = !{float 2.500000e+00}
+; CHECK: !1 = !{float 5.000000e-01}
+; CHECK: !2 = !{float 1.000000e+00}
+; CHECK: !3 = !{float 3.000000e+00}
+
+!0 = !{float 2.500000e+00}
+!1 = !{float 5.000000e-01}
+!2 = !{float 1.000000e+00}
+!3 = !{float 3.000000e+00}
diff --git a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
index 7b5158629091..bd0817d30413 100644
--- a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
+++ b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
@@ -417,12 +417,6 @@ entry:
ret void
}
-; HSAOPT: !0 = !{}
-; HSAOPT: !1 = !{i32 0, i32 2048}
-
-; NOHSAOPT: !0 = !{i32 0, i32 2048}
-
-
; FUNC-LABEL: v16i32_stack:
; R600: MOVA_INT
@@ -527,4 +521,33 @@ define void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
ret void
}
+; OPT-LABEL: @direct_alloca_read_0xi32(
+; OPT: store [0 x i32] undef, [0 x i32] addrspace(3)*
+; OPT: load [0 x i32], [0 x i32] addrspace(3)*
+define void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) {
+entry:
+ %tmp = alloca [0 x i32]
+ store [0 x i32] [], [0 x i32]* %tmp
+ %load = load [0 x i32], [0 x i32]* %tmp
+ store [0 x i32] %load, [0 x i32] addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @direct_alloca_read_1xi32(
+; OPT: store [1 x i32] zeroinitializer, [1 x i32] addrspace(3)*
+; OPT: load [1 x i32], [1 x i32] addrspace(3)*
+define void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) {
+entry:
+ %tmp = alloca [1 x i32]
+ store [1 x i32] [i32 0], [1 x i32]* %tmp
+ %load = load [1 x i32], [1 x i32]* %tmp
+ store [1 x i32] %load, [1 x i32] addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" }
+
+; HSAOPT: !0 = !{}
+; HSAOPT: !1 = !{i32 0, i32 2048}
+
+; NOHSAOPT: !0 = !{i32 0, i32 2048}
diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll
index ff730a085255..00636240bc6c 100644
--- a/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/test/CodeGen/AMDGPU/basic-branch.ll
@@ -6,7 +6,6 @@
; GCN-LABEL: {{^}}test_branch:
; GCNNOOPT: v_writelane_b32
; GCNNOOPT: v_writelane_b32
-; GCNNOOPT: v_writelane_b32
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
; GCN: ; BB#1
diff --git a/test/CodeGen/AMDGPU/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll
index 4021233e7785..65464cdba604 100644
--- a/test/CodeGen/AMDGPU/fdiv.ll
+++ b/test/CodeGen/AMDGPU/fdiv.ll
@@ -1,8 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; These tests check that fdiv is expanded correctly and also test that the
@@ -15,22 +11,59 @@
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
+; SI: v_div_scale_f32
+; SI-DAG: v_div_scale_f32
; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_mul_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_div_fmas_f32
+; SI: v_div_fixup_f32
+define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+entry:
+ %fdiv = fdiv float %a, %b
+ store float %fdiv, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fdiv_25ulp_f32:
+; SI: v_cndmask_b32
+; SI: v_mul_f32
+; SI: v_rcp_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
+define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+entry:
+ %fdiv = fdiv float %a, %b, !fpmath !0
+ store float %fdiv, float addrspace(1)* %out
+ ret void
+}
+
+; Use correct fdiv
+; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32:
+; SI: v_fma_f32
+; SI: v_div_fmas_f32
+; SI: v_div_fixup_f32
+define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
+entry:
+ %fdiv = fdiv float %a, %b, !fpmath !0
+ store float %fdiv, float addrspace(1)* %out
+ ret void
+}
-; I754-DAG: v_div_scale_f32
-; I754-DAG: v_rcp_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_mul_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_div_fixup_f32
-define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
+; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
+; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
+; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
entry:
- %0 = fdiv float %a, %b
- store float %0, float addrspace(1)* %out
+ %fdiv = fdiv fast float %a, %b
+ store float %fdiv, float addrspace(1)* %out
ret void
}
@@ -38,15 +71,14 @@ entry:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) {
+; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
+; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
entry:
- %0 = fdiv fast float %a, %b
- store float %0, float addrspace(1)* %out
+ %fdiv = fdiv fast float %a, %b
+ store float %fdiv, float addrspace(1)* %out
ret void
}
@@ -54,15 +86,14 @@ entry:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) {
+; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
+; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
entry:
- %0 = fdiv arcp float %a, %b
- store float %0, float addrspace(1)* %out
+ %fdiv = fdiv arcp float %a, %b
+ store float %fdiv, float addrspace(1)* %out
ret void
}
@@ -72,26 +103,24 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
+; SI: v_div_scale_f32
+; SI: v_div_scale_f32
+; SI: v_div_scale_f32
+; SI: v_div_scale_f32
+define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+entry:
+ %fdiv = fdiv <2 x float> %a, %b
+ store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+ ret void
+}
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32:
+; SI: v_cmp_gt_f32
+; SI: v_cmp_gt_f32
+define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
entry:
- %0 = fdiv <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
+ store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
ret void
}
@@ -101,19 +130,12 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
entry:
- %0 = fdiv fast <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ %fdiv = fdiv fast <2 x float> %a, %b
+ store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
ret void
}
@@ -123,19 +145,12 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
entry:
- %0 = fdiv arcp <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ %fdiv = fdiv arcp <2 x float> %a, %b
+ store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
ret void
}
@@ -149,37 +164,11 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; SI: v_div_fixup_f32
+; SI: v_div_fixup_f32
+; SI: v_div_fixup_f32
+; SI: v_div_fixup_f32
+define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
@@ -198,24 +187,11 @@ define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
@@ -234,24 +210,11 @@ define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> ad
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+; SI: v_rcp_f32
+define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
@@ -259,3 +222,9 @@ define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> ad
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
+
+attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
+attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
+attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
+
+!0 = !{float 2.500000e+00}
diff --git a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
index be23e10d7087..1537d67cadcc 100644
--- a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -1,7 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare double @llvm.fabs.f64(double) #1
; FUNC-LABEL: @fp_to_sint_f64_i32
; SI: v_cvt_i32_f64_e32
@@ -54,3 +55,23 @@ define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in
store i64 %cast, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
+; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
+define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+ %conv = fptosi double %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
+; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}|
+define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+ %in.fabs = call double @llvm.fabs.f64(double %in)
+ %conv = fptosi double %in.fabs to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fp_to_sint.ll b/test/CodeGen/AMDGPU/fp_to_sint.ll
index b39aeadc8cce..0cd0358bafd5 100644
--- a/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-declare float @llvm.fabs.f32(float) #0
+declare float @llvm.fabs.f32(float) #1
; FUNC-LABEL: {{^}}fp_to_sint_i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
@@ -17,7 +17,7 @@ define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
- %in.fabs = call float @llvm.fabs.f32(float %in) #0
+ %in.fabs = call float @llvm.fabs.f32(float %in)
%conv = fptosi float %in.fabs to i32
store i32 %conv, i32 addrspace(1)* %out
ret void
@@ -227,4 +227,26 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
ret void
}
-attributes #0 = { nounwind readnone }
+; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
+; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}}
+
+; EG: AND_INT
+; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y,
+; EG-NEXT: -1082130432(-1.000000e+00)
+define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+ %conv = fptosi float %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
+; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}|
+define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+ %in.fabs = call float @llvm.fabs.f32(float %in)
+ %conv = fptosi float %in.fabs to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
index 760019ebdc08..d5bc416434df 100644
--- a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
+++ b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
@@ -1,7 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare double @llvm.fabs.f64(double) #1
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
; SI: v_cvt_u32_f64_e32
@@ -68,3 +69,23 @@ define void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %
store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
ret void
}
+
+; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1:
+; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}}
+define void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+ %conv = fptoui double %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1:
+; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}|
+define void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+ %in.fabs = call double @llvm.fabs.f64(double %in)
+ %conv = fptoui double %in.fabs to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fp_to_uint.ll b/test/CodeGen/AMDGPU/fp_to_uint.ll
index b7b6ccc238b3..8a0f9fa2ac2b 100644
--- a/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -1,6 +1,8 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+
+declare float @llvm.fabs.f32(float) #1
; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
@@ -215,3 +217,27 @@ define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
ret void
}
+
+
+; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
+; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
+
+; EG: AND_INT
+; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0,
+define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+ %conv = fptoui float %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
+; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}|
+define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+ %in.fabs = call float @llvm.fabs.f32(float %in)
+ %conv = fptoui float %in.fabs to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
new file mode 100644
index 000000000000..4e17a921d91b
--- /dev/null
+++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; check llc does not crash for invalid opencl version metadata
+
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 256
+
+!opencl.ocl.version = !{}
diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
new file mode 100644
index 000000000000..35b7d70596c1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; check llc does not crash for invalid opencl version metadata
+
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 256
+
+!opencl.ocl.version = !{!0}
+!0 = !{}
diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
new file mode 100644
index 000000000000..e1693551b621
--- /dev/null
+++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; check llc does not crash for invalid opencl version metadata
+
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 256
+
+!opencl.ocl.version = !{!0}
+!0 = !{i32 1}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
new file mode 100644
index 000000000000..54d7848da3bf
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+declare float @llvm.amdgcn.fdiv.fast(float, float) #0
+
+; CHECK-LABEL: {{^}}test_fdiv_fast:
+; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
+; CHECK: v_mul_f32_e32
+; CHECK: v_rcp_f32_e32
+; CHECK: v_mul_f32_e32
+; CHECK: v_mul_f32_e32
+define void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 {
+ %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
+ store float %fdiv, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticgroup.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
index cf6d1ab237cd..6014e2ed85f8 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticgroup.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
@@ -2,13 +2,14 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
-
@lds0 = addrspace(3) global [512 x float] undef, align 4
@lds1 = addrspace(3) global [256 x float] undef, align 4
-; FUNC-LABEL: {{^}}groupstaticsize_test0:
-; CHECK: s_movk_i32 s{{[0-9]+}}, 0x800
-define void @get_groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 {
+@large = addrspace(3) global [4096 x i32] undef, align 4
+
+; CHECK-LABEL: {{^}}groupstaticsize_test0:
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0x800{{$}}
+define void @groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 64
%static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
@@ -20,9 +21,8 @@ define void @get_groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1
ret void
}
-
-; FUNC-LABEL: {{^}}groupstaticsize_test1:
-; CHECK: s_movk_i32 s{{[0-9]+}}, 0xc00
+; CHECK-LABEL: {{^}}groupstaticsize_test1:
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xc00{{$}}
define void @groupstaticsize_test1(float addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %lds_size) {
entry:
%static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
@@ -48,6 +48,16 @@ endif: ; preds = %else, %if
ret void
}
+; Exceeds 16-bit simm limit of s_movk_i32
+; CHECK-LABEL: {{^}}large_groupstaticsize:
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
+define void @large_groupstaticsize(i32 addrspace(1)* %size, i32 %idx) #0 {
+ %gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(3)* @large, i32 0, i32 %idx
+ store volatile i32 0, i32 addrspace(3)* %gep
+ %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize()
+ store i32 %static_lds_size, i32 addrspace(1)* %size
+ ret void
+}
declare i32 @llvm.amdgcn.groupstaticsize() #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/test/CodeGen/AMDGPU/rcp-pattern.ll b/test/CodeGen/AMDGPU/rcp-pattern.ll
index b1d422062543..27a88f7b59e7 100644
--- a/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -1,11 +1,96 @@
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: Evergreen only ever does unsafe fp math.
; FUNC-LABEL: {{^}}rcp_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
; EG: RECIP_IEEE
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv fast float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv arcp float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
+ %rcp = fdiv float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]|
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %src.fabs = call float @llvm.fabs.f32(float %src)
+ %rcp = fdiv float 1.0, %src.fabs
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FIXME: fneg folded into constant 1
+; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:
+define void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %src.fabs = call float @llvm.fabs.f32(float %src)
+ %src.fabs.fneg = fsub float -0.0, %src.fabs
+ %rcp = fdiv float 1.0, %src.fabs.fneg
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+
+declare float @llvm.fabs.f32(float) #1
+
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "unsafe-fp-math"="true" }
+
+!0 = !{float 2.500000e+00}
diff --git a/test/CodeGen/AMDGPU/reciprocal.ll b/test/CodeGen/AMDGPU/reciprocal.ll
deleted file mode 100644
index f9292a788521..000000000000
--- a/test/CodeGen/AMDGPU/reciprocal.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define amdgpu_ps void @test(<4 x float> inreg %reg0) {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = fdiv float 1.0, %r0
- %vec = insertelement <4 x float> undef, float %r1, i32 0
- call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll
index 10187f6125d6..4ba4ac76a280 100644
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -348,7 +348,6 @@ bb7: ; preds = %bb4
; CHECK: image_sample_c
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
-; CHECK: s_and_b64 exec, exec,
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
@@ -385,6 +384,7 @@ bb9: ; preds = %bb4
declare void @llvm.AMDGPU.kill(float) #0
declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } \ No newline at end of file
diff --git a/test/CodeGen/AMDGPU/vector-alloca.ll b/test/CodeGen/AMDGPU/vector-alloca.ll
index c151ca9ef9b4..7dcf36f144ac 100644
--- a/test/CodeGen/AMDGPU/vector-alloca.ll
+++ b/test/CodeGen/AMDGPU/vector-alloca.ll
@@ -3,6 +3,11 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
+
+; OPT-LABEL: @vector_read(
+; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}vector_read:
; EG: MOV
@@ -12,21 +17,26 @@
; EG: MOVA_INT
define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
- %0 = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
+ %tmp = alloca [4 x i32]
+ %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
- %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index
- %2 = load i32, i32* %1
- store i32 %2, i32 addrspace(1)* %out
+ %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
+ %tmp2 = load i32, i32* %tmp1
+ store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
+; OPT-LABEL: @vector_write(
+; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index
+; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index
+; OPT: store i32 %1, i32 addrspace(1)* %out, align 4
+
; FUNC-LABEL: {{^}}vector_write:
; EG: MOV
; EG: MOV
@@ -36,42 +46,95 @@ entry:
; EG: MOVA_INT
define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
- %0 = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
+ %tmp = alloca [4 x i32]
+ %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index
- store i32 1, i32* %1
- %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index
- %3 = load i32, i32* %2
- store i32 %3, i32 addrspace(1)* %out
+ %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index
+ store i32 1, i32* %tmp1
+ %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index
+ %tmp3 = load i32, i32* %tmp2
+ store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
; This test should be optimize to:
; store i32 0, i32 addrspace(1)* %out
+
+; OPT-LABEL: @bitcast_gep(
+; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4
+
; FUNC-LABEL: {{^}}bitcast_gep:
; EG: STORE_RAW
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
- %0 = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
+ %tmp = alloca [4 x i32]
+ %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
- %2 = bitcast i32* %1 to [4 x i32]*
- %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0
- %4 = load i32, i32* %3
- store i32 %4, i32 addrspace(1)* %out
+ %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
+ %tmp2 = bitcast i32* %tmp1 to [4 x i32]*
+ %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0
+ %tmp4 = load i32, i32* %tmp3
+ store i32 %tmp4, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @vector_read_bitcast_gep(
+; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index
+; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
+define void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
+entry:
+ %tmp = alloca [4 x i32]
+ %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
+ %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
+ %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
+ %bc = bitcast i32* %x to float*
+ store float 1.0, float* %bc
+ store i32 1, i32* %y
+ store i32 2, i32* %z
+ store i32 3, i32* %w
+ %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
+ %tmp2 = load i32, i32* %tmp1
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: Should be able to promote this. Instcombine should fold the
+; cast in the hasOneUse case so it might not matter in practice
+
+; OPT-LABEL: @vector_read_bitcast_alloca(
+; OPT: alloca [4 x float]
+; OPT: store float
+; OPT: store float
+; OPT: store float
+; OPT: store float
+; OPT: load float
+define void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
+entry:
+ %tmp = alloca [4 x i32]
+ %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]*
+ %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0
+ %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1
+ %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2
+ %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3
+ store float 0.0, float* %x
+ store float 1.0, float* %y
+ store float 2.0, float* %z
+ store float 4.0, float* %w
+ %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index
+ %tmp2 = load float, float* %tmp1
+ store float %tmp2, float addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll
index 23b0ffd5b3da..809a7ba9b826 100644
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -41,14 +41,14 @@ main_body:
;CHECK: store
;CHECK-NOT: exec
;CHECK: .size test3
-define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
+define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
%tex.2 = extractelement <4 x i32> %tex.1, i32 0
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 %tex.2
- %wr = extractelement <4 x float> %tex, i32 1
- store float %wr, float addrspace(1)* %gep
+
+ call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %tex, <4 x i32> undef, i32 %tex.2, i32 0, i1 0, i1 0)
+
ret <4 x float> %tex
}
@@ -66,8 +66,9 @@ main_body:
define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %d, float %data) {
main_body:
%c.1 = mul i32 %c, %d
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.1
- store float %data, float addrspace(1)* %gep
+
+ call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
+
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
ret <4 x float> %tex
}
@@ -89,7 +90,7 @@ main_body:
;CHECK: s_mov_b64 exec, [[SAVED]]
;CHECK: %IF
;CHECK: image_sample
-define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) {
+define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
main_body:
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %IF, label %ELSE
@@ -100,8 +101,7 @@ IF:
br label %END
ELSE:
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c
- store float %data, float addrspace(1)* %gep
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
br label %END
END:
@@ -129,7 +129,7 @@ END:
;CHECK: s_or_b64 exec, exec,
;CHECK: v_mov_b32_e32 v0
;CHECK: ; return
-define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) {
+define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
main_body:
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %ELSE, label %IF
@@ -140,8 +140,7 @@ IF:
br label %END
ELSE:
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c
- store float %data, float addrspace(1)* %gep
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
br label %END
END:
@@ -163,23 +162,20 @@ END:
;CHECK: store
;CHECK: s_wqm_b64 exec, exec
;CHECK: v_cmp
-define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
+define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
main_body:
%idx.1 = extractelement <3 x i32> %idx, i32 0
- %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1
%data.1 = extractelement <2 x float> %data, i32 0
- store float %data.1, float addrspace(1)* %gep.1
+ call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
; The load that determines the branch (and should therefore be WQM) is
; surrounded by stores that require disabled WQM.
%idx.2 = extractelement <3 x i32> %idx, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.2
- %z = load float, float addrspace(1)* %gep.2
+ %z = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i1 0, i1 0)
%idx.3 = extractelement <3 x i32> %idx, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.3
%data.3 = extractelement <2 x float> %data, i32 1
- store float %data.3, float addrspace(1)* %gep.3
+ call void @llvm.amdgcn.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i1 0, i1 0)
%cc = fcmp ogt float %z, 0.0
br i1 %cc, label %IF, label %ELSE
@@ -210,24 +206,21 @@ END:
;CHECK: load
;CHECK: store
;CHECK: v_cmp
-define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
+define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tex.1 = extractelement <4 x float> %tex, i32 0
%idx.1 = extractelement <3 x i32> %idx, i32 0
- %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1
%data.1 = extractelement <2 x float> %data, i32 0
- store float %data.1, float addrspace(1)* %gep.1
+ call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
%idx.2 = extractelement <3 x i32> %idx, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.2
- %z = load float, float addrspace(1)* %gep.2
+ %z = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i1 0, i1 0)
%idx.3 = extractelement <3 x i32> %idx, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.3
%data.3 = extractelement <2 x float> %data, i32 1
- store float %data.3, float addrspace(1)* %gep.3
+ call void @llvm.amdgcn.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i1 0, i1 0)
%cc = fcmp ogt float %z, 0.0
br i1 %cc, label %IF, label %ELSE
@@ -258,15 +251,14 @@ END:
;CHECK: s_mov_b64 exec, [[SAVE]]
;CHECK: %END
;CHECK: image_sample
-define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %coord, i32 %y, float %z) {
+define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %coord, i32 %y, float %z) {
main_body:
%cond = icmp eq i32 %y, 0
br i1 %cond, label %IF, label %END
IF:
- %data = load float, float addrspace(1)* %ptr
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 1
- store float %data, float addrspace(1)* %gep
+ %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
br label %END
END:
@@ -282,13 +274,11 @@ END:
;CHECK-NEXT: s_wqm_b64 exec, exec
;CHECK: image_sample
;CHECK: s_and_b64 exec, exec, [[ORIG]]
-;SI: buffer_store_dword
-;VI: flat_store_dword
+;CHECK: buffer_store_dword
;CHECK: s_wqm_b64 exec, exec
;CHECK: v_cmpx_
;CHECK: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[ORIG]]
-;SI: buffer_store_dword
-;VI: flat_store_dword
+;CHECK: buffer_store_dword
;CHECK: s_mov_b64 exec, [[SAVE]]
;CHECK: image_sample
define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) {
@@ -296,16 +286,14 @@ main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%idx.0 = extractelement <2 x i32> %idx, i32 0
- %gep.0 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.0
%data.0 = extractelement <2 x float> %data, i32 0
- store float %data.0, float addrspace(1)* %gep.0
+ call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
call void @llvm.AMDGPU.kill(float %z)
%idx.1 = extractelement <2 x i32> %idx, i32 1
- %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1
%data.1 = extractelement <2 x float> %data, i32 1
- store float %data.1, float addrspace(1)* %gep.1
+ call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
%tex2 = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%out = fadd <4 x float> %tex, %tex2
@@ -321,16 +309,14 @@ main_body:
; CHECK: s_wqm_b64 exec, exec
; CHECK: image_sample
; CHECK: s_and_b64 exec, exec, [[ORIG]]
-; SI: buffer_store_dword
-; VI: flat_store_dword
+; CHECK: buffer_store_dword
; CHECK-NOT: wqm
; CHECK: v_cmpx_
-define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) {
+define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx
- store float %data, float addrspace(1)* %gep
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
call void @llvm.AMDGPU.kill(float %z)
@@ -350,9 +336,91 @@ main_body:
ret float %s
}
+; CHECK-LABEL: {{^}}test_loop_vcc:
+; CHECK-NEXT: ; %entry
+; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK: image_store
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2
+; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
+
+; CHECK: [[LOOPHDR]]: ; %loop
+; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]]
+; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]]
+; CHECK: s_cbranch_vccz
+; CHECK: ; %break
+
+; CHECK: ; return
+define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
+entry:
+ call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
+ br label %loop
+
+loop:
+ %ctr.iv = phi i32 [ 0, %entry ], [ %ctr.next, %body ]
+ %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ]
+ %cc = icmp sgt i32 %ctr.iv, 7
+ br i1 %cc, label %break, label %body
+
+body:
+ %c.i = bitcast <4 x float> %c.iv to <4 x i32>
+ %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %ctr.next = add i32 %ctr.iv, 2
+ br label %loop
+
+break:
+ ret <4 x float> %c.iv
+}
+
+; Only intrinsic stores need exact execution -- other stores do not have
+; externally visible effects and may require WQM for correctness.
+;
+; CHECK-LABEL: {{^}}test_alloca:
+; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
+
+; CHECK: image_sample
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK: buffer_store_dwordx4
+define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
+entry:
+ %array = alloca [32 x i32], align 4
+
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
+
+ %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0
+ store volatile i32 %a, i32* %s.gep, align 4
+
+ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
+
+ %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
+ %c = load i32, i32* %c.gep, align 4
+
+ %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+
+ call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
+
+ ret void
+}
+
+
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
+declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 151cc1b12ed2..04eae8f9afec 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse
; V8-NEXT: beq
; V8-NEXT: %tailrecurse.switch
; V8: cmp
-; V8-NEXT: beq
+; V8-NEXT: bne
; V8-NEXT: b
; The trailing space in the last line checks that the branch is unconditional
switch i32 %and, label %sw.epilog [
diff --git a/test/CodeGen/ARM/ssat-v4t.ll b/test/CodeGen/ARM/ssat-v4t.ll
new file mode 100644
index 000000000000..3d74c88da827
--- /dev/null
+++ b/test/CodeGen/ARM/ssat-v4t.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -O1 -mtriple=armv4t-none-none-eabi %s -o - 2>&1 | FileCheck %s
+
+; CHECK: Cannot select: intrinsic %llvm.arm.ssat
+define i32 @ssat() nounwind {
+ %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/ssat.ll b/test/CodeGen/ARM/ssat.ll
index 2b75bc410aa8..f1e11dd33d1f 100644
--- a/test/CodeGen/ARM/ssat.ll
+++ b/test/CodeGen/ARM/ssat.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv4t-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V4T
+; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6T2
; Check for several conditions that should result in SSAT.
; For example, the base test is equivalent to
@@ -16,7 +17,8 @@
; 32-bit base test
define i32 @sat_base_32bit(i32 %x) #0 {
; CHECK-LABEL: sat_base_32bit:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpLow = icmp slt i32 %x, -8388608
%cmpUp = icmp sgt i32 %x, 8388607
@@ -29,7 +31,8 @@ entry:
; 16-bit base test
define i16 @sat_base_16bit(i16 %x) #0 {
; CHECK-LABEL: sat_base_16bit:
-; CHECK: ssat r0, #12, r0
+; V6T2: ssat r0, #12, r0
+; V4T-NOT: ssat
entry:
%cmpLow = icmp slt i16 %x, -2048
%cmpUp = icmp sgt i16 %x, 2047
@@ -42,7 +45,8 @@ entry:
; 8-bit base test
define i8 @sat_base_8bit(i8 %x) #0 {
; CHECK-LABEL: sat_base_8bit:
-; CHECK: ssat r0, #6, r0
+; V6T2: ssat r0, #6, r0
+; V4T-NOT: ssat
entry:
%cmpLow = icmp slt i8 %x, -32
%cmpUp = icmp sgt i8 %x, 31
@@ -60,7 +64,8 @@ entry:
; x < -k ? -k : (x < k ? x : k)
define i32 @sat_lower_upper_1(i32 %x) #0 {
; CHECK-LABEL: sat_lower_upper_1:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpLow = icmp slt i32 %x, -8388608
%cmpUp = icmp slt i32 %x, 8388607
@@ -72,7 +77,8 @@ entry:
; x > -k ? (x > k ? k : x) : -k
define i32 @sat_lower_upper_2(i32 %x) #0 {
; CHECK-LABEL: sat_lower_upper_2:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpLow = icmp sgt i32 %x, -8388608
%cmpUp = icmp sgt i32 %x, 8388607
@@ -84,7 +90,8 @@ entry:
; x < k ? (x < -k ? -k : x) : k
define i32 @sat_upper_lower_1(i32 %x) #0 {
; CHECK-LABEL: sat_upper_lower_1:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpUp = icmp slt i32 %x, 8388607
%cmpLow = icmp slt i32 %x, -8388608
@@ -96,7 +103,8 @@ entry:
; x > k ? k : (x < -k ? -k : x)
define i32 @sat_upper_lower_2(i32 %x) #0 {
; CHECK-LABEL: sat_upper_lower_2:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpUp = icmp sgt i32 %x, 8388607
%cmpLow = icmp slt i32 %x, -8388608
@@ -108,7 +116,8 @@ entry:
; k < x ? k : (x > -k ? x : -k)
define i32 @sat_upper_lower_3(i32 %x) #0 {
; CHECK-LABEL: sat_upper_lower_3:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpUp = icmp slt i32 8388607, %x
%cmpLow = icmp sgt i32 %x, -8388608
@@ -125,7 +134,8 @@ entry:
; k <= x ? k : (x >= -k ? x : -k)
define i32 @sat_le_ge(i32 %x) #0 {
; CHECK-LABEL: sat_le_ge:
-; CHECK: ssat r0, #24, r0
+; V6T2: ssat r0, #24, r0
+; V4T-NOT: ssat
entry:
%cmpUp = icmp sle i32 8388607, %x
%cmpLow = icmp sge i32 %x, -8388608
diff --git a/test/CodeGen/ARM/usat-v4t.ll b/test/CodeGen/ARM/usat-v4t.ll
new file mode 100644
index 000000000000..572c760e3ae6
--- /dev/null
+++ b/test/CodeGen/ARM/usat-v4t.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -O1 -mtriple=armv4t-none-none-eabi %s -o - 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat
+define i32 @usat1() nounwind {
+ %tmp = call i32 @llvm.arm.usat(i32 128, i32 31)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index f736ddd0def6..c0229c626a0e 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -11,13 +11,13 @@ entry:
; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
; STATIC-O32: lui $[[R0:[0-9]+]], %hi($CPI0_0)
; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
-; PIC-N32: lw $[[R0:[0-9]+]], %got_page($CPI0_0)
-; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
-; STATIC-N32: lui $[[R0:[0-9]+]], %hi($CPI0_0)
-; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
-; PIC-N64: ld $[[R0:[0-9]+]], %got_page($CPI0_0)
-; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
-; STATIC-N64: ld $[[R0:[0-9]+]], %got_page($CPI0_0)
-; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; PIC-N32: lw $[[R0:[0-9]+]], %got_page(.LCPI0_0)
+; PIC-N32: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]])
+; STATIC-N32: lui $[[R0:[0-9]+]], %hi(.LCPI0_0)
+; STATIC-N32: lwc1 $f0, %lo(.LCPI0_0)($[[R0]])
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page(.LCPI0_0)
+; PIC-N64: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]])
+; STATIC-N64: ld $[[R0:[0-9]+]], %got_page(.LCPI0_0)
+; STATIC-N64: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]])
ret float 0x400B333340000000
}
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 7d66d1a1a204..5f0a0a5a4929 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -27,9 +27,9 @@ entry:
; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
; PIC-O32: jr $[[R5]]
; N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
-; N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
+; N64: ld $[[R1:[0-9]+]], %got_page(.LJTI0_0)
; N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
-; N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
+; N64: ld $[[R4:[0-9]+]], %got_ofst(.LJTI0_0)($[[R2]])
; N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
; N64: jr $[[R5]]
switch i32 %0, label %bb4 [
@@ -68,7 +68,7 @@ bb5: ; preds = %entry
; PIC-O32: .gpword
; PIC-O32: .gpword
; N64: .p2align 3
-; N64: $JTI0_0:
+; N64: .LJTI0_0:
; N64: .gpdword
; N64: .gpdword
; N64: .gpdword
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 377fe9327e0e..62150875e75b 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -10,7 +10,7 @@ define double @foo(double %a, double %b) nounwind readnone {
entry:
; ALL-LABEL: foo:
-; FCC: bc1f $BB
+; FCC: bc1f {{\$|\.L}}BB
; FCC: nop
; 32-GPR: mtc1 $zero, $[[Z:f[0-9]]]
@@ -19,7 +19,7 @@ entry:
; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12
; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]]
; GPR-NOT: not $[[GPRCC]], $[[GPRCC]]
-; GPR: bnezc $[[GPRCC]], $BB
+; GPR: bnezc $[[GPRCC]], {{\$|\.L}}BB
%cmp = fcmp ogt double %a, 0.000000e+00
br i1 %cmp, label %if.end6, label %if.else
@@ -43,7 +43,7 @@ define void @f1(float %f) nounwind {
entry:
; ALL-LABEL: f1:
-; FCC: bc1f $BB
+; FCC: bc1f {{\$|\.L}}BB
; FCC: nop
; GPR: mtc1 $zero, $[[Z:f[0-9]]]
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 8f4ccb19958a..dfba8ba19331 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -34,17 +34,17 @@ entry:
; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x)
; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)(
-; O0: $[[BB0:[A-Z_0-9]+]]:
+; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; O0: ld $[[R1:[0-9]+]]
; O0-NEXT: ll $[[R2:[0-9]+]], 0($[[R1]])
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R3:[0-9]+]], 0($[[R0]])
; ALL: addu $[[R4:[0-9]+]], $[[R3]], $4
; ALL: sc $[[R4]], 0($[[R0]])
-; NOT-MICROMIPS: beqz $[[R4]], $[[BB0]]
-; MICROMIPS: beqzc $[[R4]], $[[BB0]]
-; MIPSR6: beqzc $[[R4]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R4]], [[BB0]]
+; MICROMIPS: beqzc $[[R4]], [[BB0]]
+; MIPSR6: beqzc $[[R4]], [[BB0]]
}
define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
@@ -59,14 +59,14 @@ entry:
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R1:[0-9]+]], 0($[[R0]])
; ALL: and $[[R3:[0-9]+]], $[[R1]], $4
; ALL: nor $[[R2:[0-9]+]], $zero, $[[R3]]
; ALL: sc $[[R2]], 0($[[R0]])
-; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]]
-; MICROMIPS: beqzc $[[R2]], $[[BB0]]
-; MIPSR6: beqzc $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R2]], [[BB0]]
+; MICROMIPS: beqzc $[[R2]], [[BB0]]
+; MIPSR6: beqzc $[[R2]], [[BB0]]
}
define i32 @AtomicSwap32(i32 signext %newval) nounwind {
@@ -82,12 +82,12 @@ entry:
; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x)
; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll ${{[0-9]+}}, 0($[[R0]])
; ALL: sc $[[R2:[0-9]+]], 0($[[R0]])
-; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]]
-; MICROMIPS: beqzc $[[R2]], $[[BB0]]
-; MIPSR6: beqzc $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R2]], [[BB0]]
+; MICROMIPS: beqzc $[[R2]], [[BB0]]
+; MIPSR6: beqzc $[[R2]], [[BB0]]
}
define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
@@ -104,16 +104,16 @@ entry:
; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x)
; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)(
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $2, 0($[[R0]])
-; NOT-MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]]
-; MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]]
-; MIPSR6: bnec $2, $4, $[[BB1:[A-Z_0-9]+]]
+; NOT-MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MIPSR6: bnec $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]]
; ALL: sc $[[R2:[0-9]+]], 0($[[R0]])
-; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]]
-; MICROMIPS: beqzc $[[R2]], $[[BB0]]
-; MIPSR6: beqzc $[[R2]], $[[BB0]]
-; ALL: $[[BB1]]:
+; NOT-MICROMIPS: beqz $[[R2]], [[BB0]]
+; MICROMIPS: beqzc $[[R2]], [[BB0]]
+; MIPSR6: beqzc $[[R2]], [[BB0]]
+; ALL: [[BB1]]:
}
@@ -141,20 +141,20 @@ entry:
; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]]
; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]]
-; O0: $[[BB0:[A-Z_0-9]+]]:
+; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; O0: ld $[[R10:[0-9]+]]
; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]])
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R12:[0-9]+]], 0($[[R2]])
; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]]
; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]]
; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]]
; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]]
; ALL: sc $[[R16]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]]
-; MICROMIPS: beqzc $[[R16]], $[[BB0]]
-; MIPSR6: beqzc $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R16]], [[BB0]]
+; MICROMIPS: beqzc $[[R16]], [[BB0]]
+; MIPSR6: beqzc $[[R16]], [[BB0]]
; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]]
; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]]
@@ -186,20 +186,20 @@ entry:
; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]]
; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]]
-; O0: $[[BB0:[A-Z_0-9]+]]:
+; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; O0: ld $[[R10:[0-9]+]]
; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]])
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R12:[0-9]+]], 0($[[R2]])
; ALL: subu $[[R13:[0-9]+]], $[[R12]], $[[R9]]
; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]]
; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]]
; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]]
; ALL: sc $[[R16]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]]
-; MICROMIPS: beqzc $[[R16]], $[[BB0]]
-; MIPSR6: beqzc $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R16]], [[BB0]]
+; MICROMIPS: beqzc $[[R16]], [[BB0]]
+; MIPSR6: beqzc $[[R16]], [[BB0]]
; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]]
; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]]
@@ -231,11 +231,11 @@ entry:
; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]]
; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]]
-; O0: $[[BB0:[A-Z_0-9]+]]:
+; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; O0: ld $[[R10:[0-9]+]]
; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]])
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R12:[0-9]+]], 0($[[R2]])
; ALL: and $[[R13:[0-9]+]], $[[R12]], $[[R9]]
; ALL: nor $[[R14:[0-9]+]], $zero, $[[R13]]
@@ -243,9 +243,9 @@ entry:
; ALL: and $[[R16:[0-9]+]], $[[R12]], $[[R8]]
; ALL: or $[[R17:[0-9]+]], $[[R16]], $[[R15]]
; ALL: sc $[[R17]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R17]], $[[BB0]]
-; MICROMIPS: beqzc $[[R17]], $[[BB0]]
-; MIPSR6: beqzc $[[R17]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R17]], [[BB0]]
+; MICROMIPS: beqzc $[[R17]], [[BB0]]
+; MIPSR6: beqzc $[[R17]], [[BB0]]
; ALL: and $[[R18:[0-9]+]], $[[R12]], $[[R7]]
; ALL: srlv $[[R19:[0-9]+]], $[[R18]], $[[R5]]
@@ -277,15 +277,15 @@ entry:
; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]]
; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]]
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R10:[0-9]+]], 0($[[R2]])
; ALL: and $[[R18:[0-9]+]], $[[R9]], $[[R7]]
; ALL: and $[[R13:[0-9]+]], $[[R10]], $[[R8]]
; ALL: or $[[R14:[0-9]+]], $[[R13]], $[[R18]]
; ALL: sc $[[R14]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]]
-; MICROMIPS: beqzc $[[R14]], $[[BB0]]
-; MIPSR6: beqzc $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R14]], [[BB0]]
+; MICROMIPS: beqzc $[[R14]], [[BB0]]
+; MIPSR6: beqzc $[[R14]], [[BB0]]
; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]]
; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -322,21 +322,21 @@ entry:
; ALL: andi $[[R11:[0-9]+]], $5, 255
; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]]
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R13:[0-9]+]], 0($[[R2]])
; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]]
-; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+; NOT-MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MIPSR6: bnec $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]]
; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]]
; ALL: sc $[[R16]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]]
-; MICROMIPS: beqzc $[[R16]], $[[BB0]]
-; MIPSR6: beqzc $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R16]], [[BB0]]
+; MICROMIPS: beqzc $[[R16]], [[BB0]]
+; MIPSR6: beqzc $[[R16]], [[BB0]]
-; ALL: $[[BB1]]:
+; ALL: [[BB1]]:
; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]]
; NO-SEB-SEH: sll $[[R18:[0-9]+]], $[[R17]], 24
@@ -366,21 +366,21 @@ entry:
; ALL: andi $[[R11:[0-9]+]], $6, 255
; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]]
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R13:[0-9]+]], 0($[[R2]])
; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]]
-; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+; NOT-MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
+; MIPSR6: bnec $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]]
; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]]
; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]]
; ALL: sc $[[R16]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]]
-; MICROMIPS: beqzc $[[R16]], $[[BB0]]
-; MIPSR6: beqzc $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R16]], [[BB0]]
+; MICROMIPS: beqzc $[[R16]], [[BB0]]
+; MIPSR6: beqzc $[[R16]], [[BB0]]
-; ALL: $[[BB1]]:
+; ALL: [[BB1]]:
; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]]
; NO-SEB-SEH: sll $[[R18:[0-9]+]], $[[R17]], 24
@@ -423,20 +423,20 @@ entry:
; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]]
; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]]
-; O0: $[[BB0:[A-Z_0-9]+]]:
+; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; O0: ld $[[R10:[0-9]+]]
; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]])
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R12:[0-9]+]], 0($[[R2]])
; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]]
; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]]
; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]]
; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]]
; ALL: sc $[[R16]], 0($[[R2]])
-; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]]
-; MICROMIPS: beqzc $[[R16]], $[[BB0]]
-; MIPSR6: beqzc $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R16]], [[BB0]]
+; MICROMIPS: beqzc $[[R16]], [[BB0]]
+; MIPSR6: beqzc $[[R16]], [[BB0]]
; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]]
; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]]
@@ -465,15 +465,15 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; ALL: sync
; ALL: andi $[[R3:[0-9]+]], $[[R2]], 65535
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R4:[0-9]+]], 0($[[R5:[0-9]+]])
; ALL: and $[[R6:[0-9]+]], $[[R4]], $
; ALL: and $[[R7:[0-9]+]], $[[R4]], $
; ALL: or $[[R8:[0-9]+]], $[[R7]], $
; ALL: sc $[[R8]], 0($[[R5]])
-; NOT-MICROMIPS: beqz $[[R8]], $[[BB0]]
-; MICROMIPS: beqzc $[[R8]], $[[BB0]]
-; MIPSR6: beqzc $[[R8]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R8]], [[BB0]]
+; MICROMIPS: beqzc $[[R8]], [[BB0]]
+; MIPSR6: beqzc $[[R8]], [[BB0]]
; ALL: srlv $[[R9:[0-9]+]], $[[R6]], $
@@ -538,11 +538,11 @@ entry:
; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)(
; ALL: addiu $[[PTR:[0-9]+]], $[[R0]], 1024
-; ALL: $[[BB0:[A-Z_0-9]+]]:
+; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]:
; ALL: ll $[[R1:[0-9]+]], 0($[[PTR]])
; ALL: addu $[[R2:[0-9]+]], $[[R1]], $4
; ALL: sc $[[R2]], 0($[[PTR]])
-; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]]
-; MICROMIPS: beqzc $[[R2]], $[[BB0]]
-; MIPSR6: beqzc $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz $[[R2]], [[BB0]]
+; MICROMIPS: beqzc $[[R2]], [[BB0]]
+; MIPSR6: beqzc $[[R2]], [[BB0]]
}
diff --git a/test/CodeGen/Mips/blez_bgez.ll b/test/CodeGen/Mips/blez_bgez.ll
index dcda047f8d09..84c8af45db81 100644
--- a/test/CodeGen/Mips/blez_bgez.ll
+++ b/test/CodeGen/Mips/blez_bgez.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=mips64el < %s | FileCheck %s
; CHECK-LABEL: test_blez:
-; CHECK: blez ${{[0-9]+}}, $BB
+; CHECK: blez ${{[0-9]+}}, {{\$|\.L}}BB
define void @test_blez(i32 %a) {
entry:
@@ -20,7 +20,7 @@ if.end:
declare void @foo1()
; CHECK-LABEL: test_bgez:
-; CHECK: bgez ${{[0-9]+}}, $BB
+; CHECK: bgez ${{[0-9]+}}, {{\$|\.L}}BB
define void @test_bgez(i32 %a) {
entry:
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index f74363702af5..9bc9a305a204 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -22,22 +22,22 @@ entry:
; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
; STATIC-O32: lui $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
-; PIC-N32: lw $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
-; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
-; PIC-N32: lw $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
-; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
-; STATIC-N32: lui $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
-; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
-; STATIC-N32: lui $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
-; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
-; PIC-N64: ld $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
-; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
-; PIC-N64: ld $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
-; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
-; STATIC-N64: ld $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]])
-; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
-; STATIC-N64: ld $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
-; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
+; PIC-N32: lw $[[R0:[0-9]+]], %got_page(.Ltmp[[T0:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst(.Ltmp[[T0]])
+; PIC-N32: lw $[[R1:[0-9]+]], %got_page(.Ltmp[[T1:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst(.Ltmp[[T1]])
+; STATIC-N32: lui $[[R2:[0-9]+]], %hi(.Ltmp[[T2:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo(.Ltmp[[T2]])
+; STATIC-N32: lui $[[R3:[0-9]+]], %hi(.Ltmp[[T3:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo(.Ltmp[[T3]])
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page(.Ltmp[[T0:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst(.Ltmp[[T0]])
+; PIC-N64: ld $[[R1:[0-9]+]], %got_page(.Ltmp[[T1:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst(.Ltmp[[T1]])
+; STATIC-N64: ld $[[R2:[0-9]+]], %got_page(.Ltmp[[T2:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst(.Ltmp[[T2]])
+; STATIC-N64: ld $[[R3:[0-9]+]], %got_page(.Ltmp[[T3:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst(.Ltmp[[T3]])
; STATIC-MIPS16-1: .ent f
; STATIC-MIPS16-2: .ent f
; STATIC-MIPS16-1: li $[[R1_16:[0-9]+]], %hi($tmp[[TI_16:[0-9]+]])
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
index d6d47678590a..9352294991aa 100644
--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -33,9 +33,15 @@ declare void @foo()
; ALL: GCC_except_table{{[0-9]+}}:
; ALL: .byte 155 # @TType Encoding = indirect pcrel sdata4
-; ALL: $[[PC_LABEL:tmp[0-9]+]]:
-; ALL: .4byte ($_ZTISt9exception.DW.stub)-($[[PC_LABEL]])
-; ALL: $_ZTISt9exception.DW.stub:
+; O32: [[PC_LABEL:\$tmp[0-9]+]]:
+; N32: [[PC_LABEL:\.Ltmp[0-9]+]]:
+; N64: [[PC_LABEL:\.Ltmp[0-9]+]]:
+; O32: .4byte ($_ZTISt9exception.DW.stub)-([[PC_LABEL]])
+; N32: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]]
+; N64: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]]
+; O32: $_ZTISt9exception.DW.stub:
+; N32: .L_ZTISt9exception.DW.stub:
+; N64: .L_ZTISt9exception.DW.stub:
; O32: .4byte _ZTISt9exception
; N32: .4byte _ZTISt9exception
; N64: .8byte _ZTISt9exception
diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll
index 142ee1144bbe..bd04ed0211f5 100644
--- a/test/CodeGen/Mips/fcmp.ll
+++ b/test/CodeGen/Mips/fcmp.ll
@@ -1076,12 +1076,12 @@ entry:
; 32-CMP-DAG: bnezc $[[T4]],
; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
-; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
+; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)(
; 64-C-DAG: c.ole.s $[[T0]], $[[T1]]
; 64-C-DAG: bc1t
; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
-; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
+; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)(
; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
; FIXME: This instruction is redundant.
@@ -1106,8 +1106,8 @@ entry:
; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25
; MM64R6-DAG: daddiu $[[T2:[0-9]+]], $[[T1]], %lo(%neg(%gp_rel(bug1_f32)))
; MM64R6-DAG: add.s $[[T3:f[0-9]+]], $f13, $f12
-; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page($CPI32_0)($[[T2]])
-; MM64R6-DAG: lwc1 $[[T5:f[0-9]+]], %got_ofst($CPI32_0)($[[T4]])
+; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page(.LCPI32_0)($[[T2]])
+; MM64R6-DAG: lwc1 $[[T5:f[0-9]+]], %got_ofst(.LCPI32_0)($[[T4]])
; MM64R6-DAG: cmp.le.s $[[T6:f[0-9]+]], $[[T3]], $[[T5]]
; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]]
; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1
@@ -1145,12 +1145,12 @@ entry:
; 32-CMP-DAG: bnezc $[[T4]],
; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
-; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
+; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)(
; 64-C-DAG: c.ole.d $[[T0]], $[[T1]]
; 64-C-DAG: bc1t
; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
-; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
+; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)(
; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
; FIXME: This instruction is redundant.
@@ -1175,8 +1175,8 @@ entry:
; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25
; MM64R6-DAG: daddiu $[[T2:[0-9]+]], $[[T1]], %lo(%neg(%gp_rel(bug1_f64)))
; MM64R6-DAG: add.d $[[T3:f[0-9]+]], $f13, $f12
-; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page($CPI33_0)($[[T2]])
-; MM64R6-DAG: ldc1 $[[T5:f[0-9]+]], %got_ofst($CPI33_0)($[[T4]])
+; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page(.LCPI33_0)($[[T2]])
+; MM64R6-DAG: ldc1 $[[T5:f[0-9]+]], %got_ofst(.LCPI33_0)($[[T4]])
; MM64R6-DAG: cmp.le.d $[[T6:f[0-9]+]], $[[T3]], $[[T5]]
; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]]
; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index bf1b045dbf28..7fb508f606b2 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -10,8 +10,9 @@ entry:
; ALL-LABEL: func0:
; 32-FCC: c.eq.s $f12, $f14
+; 32-FCC: bc1f $BB0_2
; 64-FCC: c.eq.s $f12, $f13
-; FCC: bc1f $BB0_2
+; 64-FCC: bc1f .LBB0_2
; 32-GPR: cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f14
; 64-GPR: cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f13
@@ -19,7 +20,7 @@ entry:
; FIXME: We ought to be able to transform not+bnez -> beqz
; GPR: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: bnez $[[GPRCC]], $BB0_2
-; 64-GPR: bnezc $[[GPRCC]], $BB0_2
+; 64-GPR: bnezc $[[GPRCC]], .LBB0_2
%cmp = fcmp oeq float %f2, %f3
br i1 %cmp, label %if.then, label %if.else
@@ -45,15 +46,16 @@ entry:
; ALL-LABEL: func1:
; 32-FCC: c.olt.s $f12, $f14
+; 32-FCC: bc1f $BB1_2
; 64-FCC: c.olt.s $f12, $f13
-; FCC: bc1f $BB1_2
+; 64-FCC: bc1f .LBB1_2
; 32-GPR: cmp.ule.s $[[FGRCC:f[0-9]+]], $f14, $f12
; 64-GPR: cmp.ule.s $[[FGRCC:f[0-9]+]], $f13, $f12
; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
; GPR-NOT: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: bnez $[[GPRCC]], $BB1_2
-; 64-GPR: bnezc $[[GPRCC]], $BB1_2
+; 64-GPR: bnezc $[[GPRCC]], .LBB1_2
%cmp = fcmp olt float %f2, %f3
br i1 %cmp, label %if.then, label %if.else
@@ -75,15 +77,16 @@ entry:
; ALL-LABEL: func2:
; 32-FCC: c.ole.s $f12, $f14
+; 32-FCC: bc1t $BB2_2
; 64-FCC: c.ole.s $f12, $f13
-; FCC: bc1t $BB2_2
+; 64-FCC: bc1t .LBB2_2
; 32-GPR: cmp.ult.s $[[FGRCC:f[0-9]+]], $f14, $f12
; 64-GPR: cmp.ult.s $[[FGRCC:f[0-9]+]], $f13, $f12
; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
; GPR-NOT: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: beqz $[[GPRCC]], $BB2_2
-; 64-GPR: beqzc $[[GPRCC]], $BB2_2
+; 64-GPR: beqzc $[[GPRCC]], .LBB2_2
%cmp = fcmp ugt float %f2, %f3
br i1 %cmp, label %if.else, label %if.then
@@ -105,8 +108,9 @@ entry:
; ALL-LABEL: func3:
; 32-FCC: c.eq.d $f12, $f14
+; 32-FCC: bc1f $BB3_2
; 64-FCC: c.eq.d $f12, $f13
-; FCC: bc1f $BB3_2
+; 64-FCC: bc1f .LBB3_2
; 32-GPR: cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f14
; 64-GPR: cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f13
@@ -114,7 +118,7 @@ entry:
; FIXME: We ought to be able to transform not+bnez -> beqz
; GPR: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: bnez $[[GPRCC]], $BB3_2
-; 64-GPR: bnezc $[[GPRCC]], $BB3_2
+; 64-GPR: bnezc $[[GPRCC]], .LBB3_2
%cmp = fcmp oeq double %f2, %f3
br i1 %cmp, label %if.then, label %if.else
@@ -136,15 +140,16 @@ entry:
; ALL-LABEL: func4:
; 32-FCC: c.olt.d $f12, $f14
+; 32-FCC: bc1f $BB4_2
; 64-FCC: c.olt.d $f12, $f13
-; FCC: bc1f $BB4_2
+; 64-FCC: bc1f .LBB4_2
; 32-GPR: cmp.ule.d $[[FGRCC:f[0-9]+]], $f14, $f12
; 64-GPR: cmp.ule.d $[[FGRCC:f[0-9]+]], $f13, $f12
; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
; GPR-NOT: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: bnez $[[GPRCC]], $BB4_2
-; 64-GPR: bnezc $[[GPRCC]], $BB4_2
+; 64-GPR: bnezc $[[GPRCC]], .LBB4_2
%cmp = fcmp olt double %f2, %f3
br i1 %cmp, label %if.then, label %if.else
@@ -166,15 +171,16 @@ entry:
; ALL-LABEL: func5:
; 32-FCC: c.ole.d $f12, $f14
+; 32-FCC: bc1t $BB5_2
; 64-FCC: c.ole.d $f12, $f13
-; FCC: bc1t $BB5_2
+; 64-FCC: bc1t .LBB5_2
; 32-GPR: cmp.ult.d $[[FGRCC:f[0-9]+]], $f14, $f12
; 64-GPR: cmp.ult.d $[[FGRCC:f[0-9]+]], $f13, $f12
; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
; GPR-NOT: not $[[GPRCC]], $[[GPRCC]]
; 32-GPR: beqz $[[GPRCC]], $BB5_2
-; 64-GPR: beqzc $[[GPRCC]], $BB5_2
+; 64-GPR: beqzc $[[GPRCC]], .LBB5_2
%cmp = fcmp ugt double %f2, %f3
br i1 %cmp, label %if.else, label %if.then
diff --git a/test/CodeGen/Mips/jumptable_labels.ll b/test/CodeGen/Mips/jumptable_labels.ll
new file mode 100644
index 000000000000..8c7edc10689f
--- /dev/null
+++ b/test/CodeGen/Mips/jumptable_labels.ll
@@ -0,0 +1,75 @@
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64 -target-abi=n32 < %s | FileCheck %s -check-prefix=N32
+; RUN: llc -march=mips64 < %s | FileCheck %s -check-prefix=N64
+
+; We only use the '$' prefix on O32. The others use the ELF convention.
+; O32: $JTI0_0
+; N32: .LJTI0_0
+; N64: .LJTI0_0
+
+; Check basic block labels while we're at it.
+; O32: $BB0_2:
+; N32: .LBB0_2:
+; N64: .LBB0_2:
+
+@.str = private unnamed_addr constant [2 x i8] c"A\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"B\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"C\00", align 1
+@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1
+@.str.4 = private unnamed_addr constant [2 x i8] c"E\00", align 1
+@.str.5 = private unnamed_addr constant [2 x i8] c"F\00", align 1
+@.str.6 = private unnamed_addr constant [2 x i8] c"G\00", align 1
+@.str.7 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i8* @_Z3fooi(i32 signext %Letter) {
+entry:
+ %retval = alloca i8*, align 8
+ %Letter.addr = alloca i32, align 4
+ store i32 %Letter, i32* %Letter.addr, align 4
+ %0 = load i32, i32* %Letter.addr, align 4
+ switch i32 %0, label %sw.epilog [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb4
+ i32 5, label %sw.bb5
+ i32 6, label %sw.bb6
+ ]
+
+sw.bb:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb1:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb2:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb3:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb4:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb5:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb6:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.6, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.epilog:
+ store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.7, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+return:
+ %1 = load i8*, i8** %retval, align 8
+ ret i8* %1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/ashr.ll b/test/CodeGen/Mips/llvm-ir/ashr.ll
index af9b81f9203f..cfb9855e6438 100644
--- a/test/CodeGen/Mips/llvm-ir/ashr.ll
+++ b/test/CodeGen/Mips/llvm-ir/ashr.ll
@@ -167,18 +167,18 @@ entry:
; M3: sll $[[T0:[0-9]+]], $7, 0
; M3: dsrav $[[T1:[0-9]+]], $4, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
- ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T3:[0-9]+]], [[BB0:.LBB[0-9_]+]]
; M3: move $3, $[[T1]]
; M3: dsrlv $[[T4:[0-9]+]], $5, $7
; M3: dsll $[[T5:[0-9]+]], $4, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
; M3: or $3, $[[T7]], $[[T4]]
- ; M3: $[[BB0]]:
- ; M3: beqz $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: [[BB0]]:
+ ; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]]
; M3: nop
; M3: dsra $2, $4, 63
- ; M3: $[[BB1]]:
+ ; M3: [[BB1]]:
; M3: jr $ra
; M3: nop
diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
index d982b570d7c2..8fed32aee9be 100644
--- a/test/CodeGen/Mips/llvm-ir/indirectbr.ll
+++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
@@ -18,13 +18,13 @@ define i32 @br(i8 *%addr) {
; R6C: jrc $4 # <MCInst #{{[0-9]+}} JIC
-; ALL: $BB0_1: # %L1
+; ALL: {{\$|\.L}}BB0_1: # %L1
; NOT-R6: jr $ra # <MCInst #{{[0-9]+}} JR
; R6: jr $ra # <MCInst #{{[0-9]+}} JALR
; R6C: jr $ra # <MCInst #{{[0-9]+}} JALR
; ALL: addiu $2, $zero, 0
-; ALL: $BB0_2: # %L2
+; ALL: {{\$|\.L}}BB0_2: # %L2
; NOT-R6: jr $ra # <MCInst #{{[0-9]+}} JR
; R6: jr $ra # <MCInst #{{[0-9]+}} JALR
; R6C: jr $ra # <MCInst #{{[0-9]+}} JALR
diff --git a/test/CodeGen/Mips/llvm-ir/lshr.ll b/test/CodeGen/Mips/llvm-ir/lshr.ll
index 10748b9c803a..63fb075a4ad6 100644
--- a/test/CodeGen/Mips/llvm-ir/lshr.ll
+++ b/test/CodeGen/Mips/llvm-ir/lshr.ll
@@ -158,18 +158,18 @@ entry:
; M3: sll $[[T0:[0-9]+]], $7, 0
; M3: dsrlv $[[T1:[0-9]+]], $4, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
- ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]]
; M3: move $3, $[[T1]]
; M3: dsrlv $[[T4:[0-9]+]], $5, $7
; M3: dsll $[[T5:[0-9]+]], $4, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
; M3: or $3, $[[T7]], $[[T4]]
- ; M3: $[[BB0]]:
- ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: [[BB0]]:
+ ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]]
; M3: daddiu $2, $zero, 0
; M3: move $2, $[[T1]]
- ; M3: $[[BB1]]:
+ ; M3: [[BB1]]:
; M3: jr $ra
; M3: nop
diff --git a/test/CodeGen/Mips/llvm-ir/select-dbl.ll b/test/CodeGen/Mips/llvm-ir/select-dbl.ll
index 1ca5b4e054ba..42f02c4cbabf 100644
--- a/test/CodeGen/Mips/llvm-ir/select-dbl.ll
+++ b/test/CodeGen/Mips/llvm-ir/select-dbl.ll
@@ -58,10 +58,10 @@ entry:
; SEL-32: sel.d $f0, $[[F1]], $[[F0]]
; M3: andi $[[T0:[0-9]+]], $4, 1
- ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:.LBB[0-9_]+]]
; M3: nop
; M3: mov.d $f13, $f14
- ; M3: $[[BB0]]:
+ ; M3: [[BB0]]:
; M3: jr $ra
; M3: mov.d $f0, $f13
@@ -106,10 +106,10 @@ entry:
; SEL-32: sel.d $f0, $f14, $f12
; M3: andi $[[T0:[0-9]+]], $6, 1
- ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M3: nop
; M3: mov.d $f12, $f13
- ; M3: $[[BB0]]:
+ ; M3: [[BB0]]:
; M3: jr $ra
; M3: mov.d $f0, $f12
@@ -135,11 +135,12 @@ entry:
; M2: c.olt.d $f12, $f14
; M3: c.olt.d $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
@@ -172,11 +173,12 @@ entry:
; M2: c.ole.d $f12, $f14
; M3: c.ole.d $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
@@ -209,11 +211,12 @@ entry:
; M2: c.ule.d $f12, $f14
; M3: c.ule.d $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
@@ -246,11 +249,12 @@ entry:
; M2: c.ult.d $f12, $f14
; M3: c.ult.d $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
@@ -283,11 +287,12 @@ entry:
; M2: c.eq.d $f12, $f14
; M3: c.eq.d $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
@@ -320,11 +325,12 @@ entry:
; M2: c.ueq.d $f12, $f14
; M3: c.ueq.d $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.d $f12, $f14
; M3: mov.d $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.d $f0, $f12
diff --git a/test/CodeGen/Mips/llvm-ir/select-flt.ll b/test/CodeGen/Mips/llvm-ir/select-flt.ll
index 6a0334da4833..e632897e76cd 100644
--- a/test/CodeGen/Mips/llvm-ir/select-flt.ll
+++ b/test/CodeGen/Mips/llvm-ir/select-flt.ll
@@ -34,12 +34,13 @@ entry:
; ALL-LABEL: tst_select_i1_float:
; M2-M3: andi $[[T0:[0-9]+]], $4, 1
- ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: jr $ra
; M2: mtc1 $6, $f0
; M3: mov.s $f13, $f14
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2: mtc1 $5, $f0
; M3: mov.s $f0, $f13
@@ -76,11 +77,12 @@ entry:
; ALL-LABEL: tst_select_i1_float_reordered:
; M2-M3: andi $[[T0:[0-9]+]], $6, 1
- ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -112,11 +114,12 @@ entry:
; M2: c.olt.s $f12, $f14
; M3: c.olt.s $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -149,11 +152,12 @@ entry:
; M2: c.ole.s $f12, $f14
; M3: c.ole.s $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -186,11 +190,12 @@ entry:
; M2: c.ule.s $f12, $f14
; M3: c.ule.s $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -223,11 +228,12 @@ entry:
; M2: c.ult.s $f12, $f14
; M3: c.ult.s $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -260,11 +266,12 @@ entry:
; M2: c.eq.s $f12, $f14
; M3: c.eq.s $f12, $f13
- ; M2-M3: bc1t $[[BB0:BB[0-9_]+]]
+ ; M2: bc1t [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1t [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
@@ -297,11 +304,12 @@ entry:
; M2: c.ueq.s $f12, $f14
; M3: c.ueq.s $f12, $f13
- ; M2-M3: bc1f $[[BB0:BB[0-9_]+]]
+ ; M2: bc1f [[BB0:\$BB[0-9_]+]]
+ ; M3: bc1f [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2: mov.s $f12, $f14
; M3: mov.s $f12, $f13
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: mov.s $f0, $f12
diff --git a/test/CodeGen/Mips/llvm-ir/select-int.ll b/test/CodeGen/Mips/llvm-ir/select-int.ll
index e8f78ffdcb6a..5bee3f1dbd52 100644
--- a/test/CodeGen/Mips/llvm-ir/select-int.ll
+++ b/test/CodeGen/Mips/llvm-ir/select-int.ll
@@ -35,10 +35,11 @@ entry:
; ALL-LABEL: tst_select_i1_i1:
; M2-M3: andi $[[T0:[0-9]+]], $4, 1
- ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2-M3: move $5, $6
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: move $2, $5
@@ -70,10 +71,11 @@ entry:
; ALL-LABEL: tst_select_i1_i8:
; M2-M3: andi $[[T0:[0-9]+]], $4, 1
- ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2-M3: move $5, $6
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: move $2, $5
@@ -105,10 +107,11 @@ entry:
; ALL-LABEL: tst_select_i1_i32:
; M2-M3: andi $[[T0:[0-9]+]], $4, 1
- ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M2-M3: nop
; M2-M3: move $5, $6
- ; M2-M3: $[[BB0]]:
+ ; M2-M3: [[BB0]]:
; M2-M3: jr $ra
; M2-M3: move $2, $5
@@ -170,10 +173,10 @@ entry:
; SEL-32: or $3, $[[T4]], $[[T6]]
; M3: andi $[[T0:[0-9]+]], $4, 1
- ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]]
; M3: nop
; M3: move $5, $6
- ; M3: $[[BB0]]:
+ ; M3: [[BB0]]:
; M3: jr $ra
; M3: move $2, $5
@@ -214,19 +217,19 @@ define i8* @tst_select_word_cst(i8* %a, i8* %b) {
; M2: addiu $[[T0:[0-9]+]], $zero, -1
; M2: xor $[[T1:[0-9]+]], $5, $[[T0]]
; M2: sltu $[[T2:[0-9]+]], $zero, $[[T1]]
- ; M2: bnez $[[T2]], $[[BB0:BB[0-9_]+]]
+ ; M2: bnez $[[T2]], [[BB0:\$BB[0-9_]+]]
; M2: addiu $2, $zero, 0
; M2: move $2, $4
- ; M2: $[[BB0]]:
+ ; M2: [[BB0]]:
; M2: jr $ra
; M3: daddiu $[[T0:[0-9]+]], $zero, -1
; M3: xor $[[T1:[0-9]+]], $5, $[[T0]]
; M3: sltu $[[T2:[0-9]+]], $zero, $[[T1]]
- ; M3: bnez $[[T2]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T2]], [[BB0:\.LBB[0-9_]+]]
; M3: daddiu $2, $zero, 0
; M3: move $2, $4
- ; M3: $[[BB0]]:
+ ; M3: [[BB0]]:
; M3: jr $ra
; CMOV-32: addiu $[[T0:[0-9]+]], $zero, -1
diff --git a/test/CodeGen/Mips/llvm-ir/shl.ll b/test/CodeGen/Mips/llvm-ir/shl.ll
index fa43840a8b7b..74b6155032a3 100644
--- a/test/CodeGen/Mips/llvm-ir/shl.ll
+++ b/test/CodeGen/Mips/llvm-ir/shl.ll
@@ -174,18 +174,18 @@ entry:
; M3: sll $[[T0:[0-9]+]], $7, 0
; M3: dsllv $[[T1:[0-9]+]], $5, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
- ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]]
; M3: move $2, $[[T1]]
; M3: dsllv $[[T4:[0-9]+]], $4, $7
; M3: dsrl $[[T5:[0-9]+]], $5, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
; M3: or $2, $[[T4]], $[[T7]]
- ; M3: $[[BB0]]:
- ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: [[BB0]]:
+ ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]]
; M3: daddiu $3, $zero, 0
; M3: move $3, $[[T1]]
- ; M3: $[[BB1]]:
+ ; M3: [[BB1]]:
; M3: jr $ra
; M3: nop
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index 06eda11e7888..59e284165d40 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -84,28 +84,28 @@ end:
; Check the MIPS64 version.
; N64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1)))
-; N64: bnez $4, $[[BB0:BB[0-9_]+]]
+; N64: bnez $4, [[BB0:\.LBB[0-9_]+]]
; N64: daddu $[[R1:[0-9]+]], $[[R0]], $25
; Check for long branch expansion:
; N64: daddiu $sp, $sp, -16
; N64-NEXT: sd $ra, 0($sp)
-; N64-NEXT: daddiu $1, $zero, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; N64-NEXT: daddiu $1, $zero, %hi([[BB2:\.LBB[0-9_]+]]-[[BB1:\.LBB[0-9_]+]])
; N64-NEXT: dsll $1, $1, 16
-; N64-NEXT: bal $[[BB1]]
-; N64-NEXT: daddiu $1, $1, %lo(($[[BB2]])-($[[BB1]]))
-; N64-NEXT: $[[BB1]]:
+; N64-NEXT: bal [[BB1]]
+; N64-NEXT: daddiu $1, $1, %lo([[BB2]]-[[BB1]])
+; N64-NEXT: [[BB1]]:
; N64-NEXT: daddu $1, $ra, $1
; N64-NEXT: ld $ra, 0($sp)
; N64-NEXT: jr $1
; N64-NEXT: daddiu $sp, $sp, 16
-; N64: $[[BB0]]:
+; N64: [[BB0]]:
; N64: daddiu $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1)))
; N64: ld $[[R2:[0-9]+]], %got_disp(x)($[[GP]])
; N64: addiu $[[R3:[0-9]+]], $zero, 1
; N64: sw $[[R3]], 0($[[R2]])
-; N64: $[[BB2]]:
+; N64: [[BB2]]:
; N64: jr $ra
; N64: nop
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
index 5d253d7af253..d7a05800a273 100644
--- a/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -36,14 +36,14 @@ define void @const_v16i8() nounwind {
store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
@@ -59,8 +59,8 @@ define void @const_v16i8() nounwind {
store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
@@ -77,8 +77,8 @@ define void @const_v8i16() nounwind {
store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
@@ -93,8 +93,8 @@ define void @const_v8i16() nounwind {
store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
@@ -111,8 +111,8 @@ define void @const_v4i32() nounwind {
store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
@@ -123,14 +123,14 @@ define void @const_v4i32() nounwind {
store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
@@ -156,15 +156,15 @@ define void @const_v2i64() nounwind {
store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
index d714b3eec1f2..15468781f308 100644
--- a/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -23,8 +23,8 @@ define void @const_v4f32() nounwind {
store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
@@ -34,14 +34,14 @@ define void @const_v4f32() nounwind {
store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
@@ -55,38 +55,38 @@ define void @const_v2f64() nounwind {
store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
- ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L
; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
index b441274cec6b..7e2a81019ac4 100644
--- a/test/CodeGen/Mips/octeon.ll
+++ b/test/CodeGen/Mips/octeon.ll
@@ -91,9 +91,9 @@ entry:
define i64 @bbit0(i64 %a) nounwind {
entry:
; ALL-LABEL: bbit0:
-; OCTEON: bbit0 $4, 3, $[[BB0:BB[0-9_]+]]
+; OCTEON: bbit0 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]]
; MIPS64: andi $[[T0:[0-9]+]], $4, 8
-; MIPS64: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+; MIPS64: bnez $[[T0]], [[BB0:(\$|\.L)BB[0-9_]+]]
%bit = and i64 %a, 8
%res = icmp eq i64 %bit, 0
br i1 %res, label %endif, label %if
@@ -107,11 +107,11 @@ endif:
define i64 @bbit032(i64 %a) nounwind {
entry:
; ALL-LABEL: bbit032:
-; OCTEON: bbit032 $4, 3, $[[BB0:BB[0-9_]+]]
+; OCTEON: bbit032 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]]
; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1
; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35
; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]]
-; MIPS64: bnez $[[T2]], $[[BB0:BB[0-9_]+]]
+; MIPS64: bnez $[[T2]], [[BB0:(\$|\.L)BB[0-9_]+]]
%bit = and i64 %a, 34359738368
%res = icmp eq i64 %bit, 0
br i1 %res, label %endif, label %if
@@ -125,9 +125,9 @@ endif:
define i64 @bbit1(i64 %a) nounwind {
entry:
; ALL-LABEL: bbit1:
-; OCTEON: bbit1 $4, 3, $[[BB0:BB[0-9_]+]]
+; OCTEON: bbit1 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]]
; MIPS64: andi $[[T0:[0-9]+]], $4, 8
-; MIPS64: beqz $[[T0]], $[[BB0:BB[0-9_]+]]
+; MIPS64: beqz $[[T0]], [[BB0:(\$|\.L)BB[0-9_]+]]
%bit = and i64 %a, 8
%res = icmp ne i64 %bit, 0
br i1 %res, label %endif, label %if
@@ -141,11 +141,11 @@ endif:
define i64 @bbit132(i64 %a) nounwind {
entry:
; ALL-LABEL: bbit132:
-; OCTEON: bbit132 $4, 3, $[[BB0:BB[0-9_]+]]
+; OCTEON: bbit132 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]]
; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1
; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35
; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]]
-; MIPS64: beqz $[[T2]], $[[BB0:BB[0-9_]+]]
+; MIPS64: beqz $[[T2]], [[BB0:(\$|\.L)BB[0-9_]+]]
%bit = and i64 %a, 34359738368
%res = icmp ne i64 %bit, 0
br i1 %res, label %endif, label %if
diff --git a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index c7cf857e1d44..f886e1ff814d 100644
--- a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -681,10 +681,11 @@ define <2 x i64> @test_mm256_cvttpd_epi32(<4 x double> %a0) nounwind {
; X64-NEXT: vcvttpd2dqy %ymm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
- %cvt = fptosi <4 x double> %a0 to <4 x i32>
+ %cvt = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0)
%res = bitcast <4 x i32> %cvt to <2 x i64>
ret <2 x i64> %res
}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind {
; X32-LABEL: test_mm256_cvttps_epi32:
@@ -696,10 +697,11 @@ define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: vcvttps2dq %ymm0, %ymm0
; X64-NEXT: retq
- %cvt = fptosi <8 x float> %a0 to <8 x i32>
+ %cvt = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0)
%res = bitcast <8 x i32> %cvt to <4 x i64>
ret <4 x i64> %res
}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_div_pd:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index a7b4c6b285d8..0630fd8a93ca 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -359,35 +359,12 @@ define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
-define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
-; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
-; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-NEXT: retl
- %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
-
-
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; add operation forces the execution domain.
; CHECK-LABEL: test_x86_sse2_storeu_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vpaddb LCPI34_0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb LCPI32_0, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, (%eax)
; CHECK-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 35763297d816..c5d60da8f900 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -3431,6 +3431,39 @@ define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; AVX512VL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
; AVX-LABEL: test_x86_avx_dp_ps_256:
; AVX: ## BB#0:
@@ -4552,7 +4585,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq LCPI256_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@@ -4560,7 +4593,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq LCPI256_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 914f859927be..d2410e4a0a5d 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -744,6 +744,36 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
ret <8 x double> %1
}
+define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
+; KNL-LABEL: sitofp_16i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; KNL-NEXT: vcmpltpd %zmm1, %zmm2, %k1
+; KNL-NEXT: vcmpltpd %zmm0, %zmm2, %k2
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k2} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm1, %ymm1
+; KNL-NEXT: vcvtdq2pd %ymm1, %zmm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_16i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0
+; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1
+; SKX-NEXT: vpmovm2d %k1, %ymm0
+; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0
+; SKX-NEXT: vpmovm2d %k0, %ymm1
+; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
+ %1 = sitofp <16 x i1> %cmpres to <16 x double>
+ ret <16 x double> %1
+}
+
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; KNL-LABEL: sitofp_8i1_double:
; KNL: ## BB#0:
@@ -767,6 +797,130 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
ret <8 x double> %1
}
+define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
+; KNL-LABEL: sitofp_8i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vcvtdq2ps %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_8i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
+ %1 = sitofp <8 x i1> %cmpres to <8 x float>
+ ret <8 x float> %1
+}
+
+define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
+; KNL-LABEL: sitofp_4i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_4i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
+ %1 = sitofp <4 x i1> %cmpres to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
+; KNL-LABEL: sitofp_4i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_4i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
+ %1 = sitofp <4 x i1> %cmpres to <4 x double>
+ ret <4 x double> %1
+}
+
+define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
+; KNL-LABEL: sitofp_2i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; KNL-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm1
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: xorl %ecx, %ecx
+; KNL-NEXT: testb $1, %al
+; KNL-NEXT: movl $-1, %eax
+; KNL-NEXT: movl $0, %edx
+; KNL-NEXT: cmovnel %eax, %edx
+; KNL-NEXT: vcvtsi2ssl %edx, %xmm0, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rdx
+; KNL-NEXT: testb $1, %dl
+; KNL-NEXT: cmovnel %eax, %ecx
+; KNL-NEXT: vcvtsi2ssl %ecx, %xmm0, %xmm0
+; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_2i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
+ %1 = sitofp <2 x i1> %cmpres to <2 x float>
+ ret <2 x float> %1
+}
+
+define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
+; KNL-LABEL: sitofp_2i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sitofp_2i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0
+; SKX-NEXT: retq
+ %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
+ %1 = sitofp <2 x i1> %cmpres to <2 x double>
+ ret <2 x double> %1
+}
+
define <16 x float> @uitofp_16i8(<16 x i8>%a) {
; ALL-LABEL: uitofp_16i8:
; ALL: ## BB#0:
@@ -787,3 +941,196 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) {
ret <16 x float>%b
}
+define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
+; ALL-LABEL: uitofp_16i1_float:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mask = icmp slt <16 x i32> %a, zeroinitializer
+ %1 = uitofp <16 x i1> %mask to <16 x float>
+ ret <16 x float> %1
+}
+
+define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
+; KNL-LABEL: uitofp_16i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT: movq {{.*}}(%rip), %rax
+; KNL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT: kshiftrw $8, %k1, %k1
+; KNL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm1, %ymm1
+; KNL-NEXT: vcvtudq2pd %ymm1, %zmm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_16i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; SKX-NEXT: movl {{.*}}(%rip), %eax
+; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
+; SKX-NEXT: vcvtudq2pd %ymm1, %zmm1
+; SKX-NEXT: retq
+ %mask = icmp slt <16 x i32> %a, zeroinitializer
+ %1 = uitofp <16 x i1> %mask to <16 x double>
+ ret <16 x double> %1
+}
+
+define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
+; KNL-LABEL: uitofp_8i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_8i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0
+; SKX-NEXT: retq
+ %mask = icmp slt <8 x i32> %a, zeroinitializer
+ %1 = uitofp <8 x i1> %mask to <8 x float>
+ ret <8 x float> %1
+}
+
+define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
+; KNL-LABEL: uitofp_8i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_8i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0
+; SKX-NEXT: retq
+ %mask = icmp slt <8 x i32> %a, zeroinitializer
+ %1 = uitofp <8 x i1> %mask to <8 x double>
+ ret <8 x double> %1
+}
+
+define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
+; KNL-LABEL: uitofp_4i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
+; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_4i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0
+; SKX-NEXT: retq
+ %mask = icmp slt <4 x i32> %a, zeroinitializer
+ %1 = uitofp <4 x i1> %mask to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
+; KNL-LABEL: uitofp_4i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
+; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_4i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0
+; SKX-NEXT: retq
+ %mask = icmp slt <4 x i32> %a, zeroinitializer
+ %1 = uitofp <4 x i1> %mask to <4 x double>
+ ret <4 x double> %1
+}
+
+define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
+; KNL-LABEL: uitofp_2i1_float:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
+; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_2i1_float:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0
+; SKX-NEXT: retq
+ %mask = icmp ult <2 x i32> %a, zeroinitializer
+ %1 = uitofp <2 x i1> %mask to <2 x float>
+ ret <2 x float> %1
+}
+
+define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
+; KNL-LABEL: uitofp_2i1_double:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0
+; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; KNL-NEXT: retq
+;
+; SKX-LABEL: uitofp_2i1_double:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
+; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT: vcvtuqq2pd %xmm0, %xmm0
+; SKX-NEXT: retq
+ %mask = icmp ult <2 x i32> %a, zeroinitializer
+ %1 = uitofp <2 x i1> %mask to <2 x double>
+ ret <2 x double> %1
+}
diff --git a/test/CodeGen/X86/pr28504.ll b/test/CodeGen/X86/pr28504.ll
new file mode 100644
index 000000000000..a617c8aa4f1e
--- /dev/null
+++ b/test/CodeGen/X86/pr28504.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; The test case is rather involved, because we need to get to a state where
+; We have a sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) combine,
+; BUT this combine is only triggered post-legalization, so the setcc's return
+; type is i8. So we can't have the combine opportunity be exposed too early.
+; Basically, what we want to see is that the compare result zero-extended, and
+; then stored. Only one zext, and no sexts.
+
+; CHECK-LABEL: main:
+; CHECK: movzbl (%rdi), %[[EAX:.*]]
+; CHECK-NEXT: xorl %e[[C:.]]x, %e[[C]]x
+; CHECK-NEXT: cmpl $1, %[[EAX]]
+; CHECK-NEXT: sete %[[C]]l
+; CHECK-NEXT: movl %e[[C]]x, (%rsi)
+define void @main(i8* %p, i32* %q) {
+bb:
+ %tmp4 = load i8, i8* %p, align 1
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = load i8, i8* %p, align 1
+ %tmp7 = zext i8 %tmp6 to i32
+ %tmp8 = sub nsw i32 %tmp5, %tmp7
+ %tmp11 = icmp eq i32 %tmp7, 1
+ %tmp12 = zext i1 %tmp11 to i32
+ %tmp13 = add nsw i32 %tmp8, %tmp12
+ %tmp14 = trunc i32 %tmp13 to i8
+ %tmp15 = sext i8 %tmp14 to i16
+ %tmp16 = sext i16 %tmp15 to i32
+ store i32 %tmp16, i32* %q, align 4
+ br i1 %tmp11, label %bb21, label %bb22
+
+bb21: ; preds = %bb
+ unreachable
+
+bb22: ; preds = %bb
+ ret void
+}
diff --git a/test/CodeGen/X86/pr28824.ll b/test/CodeGen/X86/pr28824.ll
new file mode 100644
index 000000000000..ced1f00dd01b
--- /dev/null
+++ b/test/CodeGen/X86/pr28824.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
+
+@d = global i32 0, align 4
+
+; Verify the sar happens before ecx is clobbered with the parameter being
+; passed to fn3
+; CHECK-LABEL: fn4
+; CHECK: movb d, %cl
+; CHECK: sarl %cl
+; CHECK: movl $2, %ecx
+define i32 @fn4(i32 %i) #0 {
+entry:
+ %0 = load i32, i32* @d, align 4
+ %shr = ashr i32 %i, %0
+ tail call fastcc void @fn3(i32 2, i32 5, i32 %shr, i32 %i)
+ %cmp = icmp slt i32 %shr, 1
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
+
+declare void @fn3(i32 %p1, i32 %p2, i32 %p3, i32 %p4) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll
index 2102b4211153..aad00e71dda0 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll
@@ -6,13 +6,12 @@
define <4 x float> @test_mm_cvtsi64_ss(<4 x float> %a0, i64 %a1) nounwind {
; X64-LABEL: test_mm_cvtsi64_ss:
; X64: # BB#0:
-; X64-NEXT: cvtsi2ssq %rdi, %xmm1
-; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT: cvtsi2ssq %rdi, %xmm0
; X64-NEXT: retq
- %cvt = sitofp i64 %a1 to float
- %res = insertelement <4 x float> %a0, float %cvt, i32 0
+ %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1)
ret <4 x float> %res
}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
define i64 @test_mm_cvtss_si64(<4 x float> %a0) nounwind {
; X64-LABEL: test_mm_cvtss_si64:
@@ -29,7 +28,7 @@ define i64 @test_mm_cvttss_si64(<4 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttss2si %xmm0, %rax
; X64-NEXT: retq
- %cvt = extractelement <4 x float> %a0, i32 0
- %res = fptosi float %cvt to i64
+ %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
ret i64 %res
}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 090ddfdfa93a..4715b7f00fcb 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -707,20 +707,17 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind {
; X32-LABEL: test_mm_cvtsi32_ss:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: cvtsi2ssl %eax, %xmm1
-; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtsi32_ss:
; X64: # BB#0:
-; X64-NEXT: cvtsi2ssl %edi, %xmm1
-; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT: cvtsi2ssl %edi, %xmm0
; X64-NEXT: retq
- %cvt = sitofp i32 %a1 to float
- %res = insertelement <4 x float> %a0, float %cvt, i32 0
+ %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1)
ret <4 x float> %res
}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind {
; X32-LABEL: test_mm_cvtss_f32:
@@ -762,10 +759,10 @@ define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: retq
- %cvt = extractelement <4 x float> %a0, i32 0
- %res = fptosi float %cvt to i32
+ %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
ret i32 %res
}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind {
; X32-LABEL: test_mm_cvttss_si32:
@@ -777,8 +774,7 @@ define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: retq
- %cvt = extractelement <4 x float> %a0, i32 0
- %res = fptosi float %cvt to i32
+ %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
ret i32 %res
}
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll
index f5ecfa444d86..6b9dc40a59e2 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll
@@ -25,13 +25,12 @@ define i64 @test_mm_cvtsi128_si64(<2 x i64> %a0) nounwind {
define <2 x double> @test_mm_cvtsi64_sd(<2 x double> %a0, i64 %a1) nounwind {
; X64-LABEL: test_mm_cvtsi64_sd:
; X64: # BB#0:
-; X64-NEXT: cvtsi2sdq %rdi, %xmm1
-; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-NEXT: cvtsi2sdq %rdi, %xmm0
; X64-NEXT: retq
- %cvt = sitofp i64 %a1 to double
- %res = insertelement <2 x double> %a0, double %cvt, i32 0
+ %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1)
ret <2 x double> %res
}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
define <2 x i64> @test_mm_cvtsi64_si128(i64 %a0) nounwind {
; X64-LABEL: test_mm_cvtsi64_si128:
@@ -48,10 +47,10 @@ define i64 @test_mm_cvttsd_si64(<2 x double> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: retq
- %ext = extractelement <2 x double> %a0, i32 0
- %res = fptosi double %ext to i64
+ %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
ret i64 %res
}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
define <2 x i64> @test_mm_loadu_si64(i64* %a0) nounwind {
; X64-LABEL: test_mm_loadu_si64:
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index fa71325d7d6e..d3ebba93c769 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1208,6 +1208,39 @@ define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
}
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_cvtsd_ss:
+; X32: # BB#0:
+; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_cvtsd_ss:
+; X64: # BB#0:
+; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; X32-LABEL: test_mm_cvtsd_ss_load:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movaps (%eax), %xmm1
+; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_cvtsd_ss_load:
+; X64: # BB#0:
+; X64-NEXT: movaps (%rdi), %xmm1
+; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: retq
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
+ ret <4 x float> %res
+}
+
define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm_cvtsi128_si32:
; X32: # BB#0:
@@ -1303,10 +1336,11 @@ define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttps2dq %xmm0, %xmm0
; X64-NEXT: retq
- %res = fptosi <4 x float> %a0 to <4 x i32>
+ %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
; X32-LABEL: test_mm_cvttsd_si32:
@@ -1318,10 +1352,10 @@ define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: retq
- %ext = extractelement <2 x double> %a0, i32 0
- %res = fptosi double %ext to i32
+ %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
ret i32 %res
}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm_div_pd:
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index ae6626bb0dc5..27a3fce0be2a 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -66,17 +66,6 @@ define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
-define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_sse2_cvttps2dq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
-
-
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_storel_dq:
; CHECK: ## BB#0:
@@ -94,7 +83,7 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_storeu_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: paddb LCPI8_0, %xmm0
+; CHECK-NEXT: paddb LCPI7_0, %xmm0
; CHECK-NEXT: movdqu %xmm0, (%eax)
; CHECK-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index 617e30e4b92c..3ae3aecabaf5 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
@@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtsd2ss %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtsd2ss_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+
define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
; SSE-LABEL: test_x86_sse2_cvtsi2sd:
; SSE: ## BB#0:
@@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtss2sd %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtss2sd_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <4 x float>, <4 x float>* %p1
+ %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttpd2dq:
; SSE: ## BB#0:
@@ -322,6 +360,22 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+; SSE-LABEL: test_x86_sse2_cvttps2dq:
+; SSE: ## BB#0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvttps2dq:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvttps2dq %xmm0, %xmm0
+; KNL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttsd2si:
; SSE: ## BB#0:
diff --git a/test/CodeGen/X86/tail-merge-after-mbp.ll b/test/CodeGen/X86/tail-merge-after-mbp.ll
new file mode 100644
index 000000000000..dc5f3a12bd91
--- /dev/null
+++ b/test/CodeGen/X86/tail-merge-after-mbp.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=x86_64-linux -o - %s | FileCheck %s
+
+%0 = type { %1, %3* }
+%1 = type { %2* }
+%2 = type { %2*, i8* }
+%3 = type { i32, i32 (i32, i32)* }
+
+
+declare i32 @Up(...)
+declare i32 @f(i32, i32)
+
+; check loop block_14 is not merged with block_21
+; check loop block_11 is not merged with block_18, block_25
+define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) {
+; CHECK-LABEL: foo:
+; CHECK: # %block_11
+; CHECK-NEXT: movq (%r14), %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: je
+; CHECK-NEXT:# %block_14
+; CHECK-NEXT: cmpq $0, 8(%rax)
+; CHECK-NEXT: jne
+; CHECK-NEXT:# %block_18
+; CHECK-NEXT: movq (%r14), %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: je
+; CHECK-NEXT:# %block_21
+; CHECK-NEXT:# =>This Inner Loop Header
+; CHECK-NEXT: cmpq $0, 8(%rax)
+; CHECK-NEXT: jne
+; CHECK-NEXT:# %block_25
+; CHECK-NEXT:# in Loop
+; CHECK-NEXT: movq (%r14), %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: jne
+ br i1 %c, label %block_34, label %block_3
+
+block_3: ; preds = %2
+ br i1 %c, label %block_7, label %block_4
+
+block_4: ; preds = %block_3
+ %a5 = tail call i32 @f(i32 undef, i32 undef)
+ %a6 = icmp eq i32 %a5, 0
+ br i1 %a6, label %block_7, label %block_34
+
+block_7: ; preds = %block_4, %block_3
+ %a8 = icmp eq %2* null, null
+ br i1 %a8, label %block_34, label %block_9
+
+block_9: ; preds = %block_7
+ %a10 = icmp eq i8* %p1, null
+ br i1 %a10, label %block_11, label %block_32
+
+block_11: ; preds = %block_9
+ %a12 = load %2*, %2** %p2, align 8
+ %a13 = icmp eq %2* %a12, null
+ br i1 %a13, label %block_34, label %block_14
+
+block_14: ; preds = %block_11
+ %a15 = getelementptr inbounds %2, %2* %a12, i64 0, i32 1
+ %a16 = load i8*, i8** %a15, align 8
+ %a17 = icmp eq i8* %a16, null
+ br i1 %a17, label %block_18, label %block_32
+
+block_18: ; preds = %block_14
+ %a19 = load %2*, %2** %p2, align 8
+ %a20 = icmp eq %2* %a19, null
+ br i1 %a20, label %block_34, label %block_21
+
+block_21: ; preds = %block_18
+ %a22 = getelementptr inbounds %2, %2* %a19, i64 0, i32 1
+ %a23 = load i8*, i8** %a22, align 8
+ %a24 = icmp eq i8* %a23, null
+ br i1 %a24, label %block_25, label %block_32
+
+block_25: ; preds = %block_28, %block_21
+ %a26 = load %2*, %2** %p2, align 8
+ %a27 = icmp eq %2* %a26, null
+ br i1 %a27, label %block_34, label %block_28
+
+block_28: ; preds = %block_25
+ %a29 = getelementptr inbounds %2, %2* %a26, i64 0, i32 1
+ %a30 = load i8*, i8** %a29, align 8
+ %a31 = icmp eq i8* %a30, null
+ br i1 %a31, label %block_25, label %block_32
+
+block_32: ; preds = %block_28, %block_21, %block_14, %block_9
+ %a33 = tail call i32 (...) @Up()
+ br label %block_34
+
+block_34: ; preds = %block_32, %block_25, %block_18, %block_11, %block_7, %block_4, %2
+ %a35 = phi i32 [ 0, %2 ], [ %a5, %block_4 ], [ 0, %block_7 ], [ 0, %block_11 ], [ 0, %block_32 ], [ 0, %block_18 ], [ 0, %block_25 ]
+ ret i32 %a35
+}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index 5779cf33ac84..2944b17c6c16 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -44,3 +44,60 @@ entry:
%0 = shl i64 %x, 1
ret i64 %0
}
+
+@global = external global i32, align 4
+@global2 = external global i64, align 8
+
+; Test that liveness is properly updated and we do not encounter the
+; assert/crash from http://llvm.org/PR28301
+; CHECK-LABEL: ham
+define void @ham() {
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i64 [ %tmp40, %bb9 ], [ 0, %bb ]
+ %tmp2 = phi i32 [ %tmp39, %bb9 ], [ 0, %bb ]
+ %tmp3 = icmp sgt i32 undef, 10
+ br i1 %tmp3, label %bb2, label %bb3
+
+bb2:
+ %tmp6 = load i32, i32* @global, align 4
+ %tmp8 = add nsw i32 %tmp6, %tmp2
+ %tmp9 = sext i32 %tmp8 to i64
+ br label %bb6
+
+bb3:
+; CHECK: subl %e[[REG0:[a-z0-9]+]],
+; CHECK: leaq 4({{%[a-z0-9]+}}), %r[[REG0]]
+ %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ]
+ %tmp15 = add nuw i64 %tmp14, 4
+ %tmp16 = trunc i64 %tmp14 to i32
+ %tmp17 = sub i32 %tmp2, %tmp16
+ br label %bb4
+
+bb4:
+ %tmp20 = phi i64 [ %tmp14, %bb3 ], [ %tmp34, %bb5 ]
+ %tmp28 = icmp eq i32 %tmp17, 0
+ br i1 %tmp28, label %bb5, label %bb8
+
+bb5:
+ %tmp34 = add nuw nsw i64 %tmp20, 1
+ %tmp35 = icmp slt i64 %tmp34, %tmp15
+ br i1 %tmp35, label %bb4, label %bb3
+
+bb6:
+ store volatile i64 %tmp, i64* @global2, align 8
+ store volatile i64 %tmp9, i64* @global2, align 8
+ store volatile i32 %tmp6, i32* @global, align 4
+ %tmp45 = icmp slt i32 undef, undef
+ br i1 %tmp45, label %bb6, label %bb9
+
+bb8:
+ unreachable
+
+bb9:
+ %tmp39 = add nuw nsw i32 %tmp2, 4
+ %tmp40 = add nuw i64 %tmp, 4
+ br label %bb1
+}