diff options
Diffstat (limited to 'test/CodeGen')
58 files changed, 1940 insertions, 567 deletions
diff --git a/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll b/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll new file mode 100644 index 000000000000..a71b5e86138d --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-vcvtfp2fxs-combine.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=aarch64-linux-eabi -o - | FileCheck %s + +%struct.a= type { i64, i64, i64, i64 } + +; DAG combine will try to perform a transformation that creates a vcvtfp2fxs +; with a v4f64 input. Since v4i64 is not legal we should bail out. We can +; pottentially still create the vcvtfp2fxs node after legalization (but on a +; v2f64). + +; CHECK-LABEL: fun1 +define void @fun1() local_unnamed_addr { +entry: + %mul = fmul <4 x double> zeroinitializer, <double 6.553600e+04, double 6.553600e+04, double 6.553600e+04, double 6.553600e+04> + %toi = fptosi <4 x double> %mul to <4 x i64> + %ptr = getelementptr inbounds %struct.a, %struct.a* undef, i64 0, i32 2 + %elem = extractelement <4 x i64> %toi, i32 1 + store i64 %elem, i64* %ptr, align 8 + call void @llvm.trap() + unreachable +} + +; Function Attrs: noreturn nounwind +declare void @llvm.trap() + diff --git a/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll b/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll index a12132f425d9..d78c75165be2 100644 --- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll +++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll @@ -1,8 +1,246 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck %s -; RUN: opt -S -amdgpu-codegenprepare < %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s +; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s ; Make sure this doesn't crash with no triple -; CHECK-LABEL: @foo( -define void @foo() { +; NOOP-LABEL: @noop_fdiv_fpmath( +; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 +define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out ret void } + +; CHECK-LABEL: @fdiv_fpmath( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3 +; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath( +; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} +; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 +; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}} +; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0 +; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}} +; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 +define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { + %no.md = fdiv float 1.0, %x + store volatile float %no.md, float addrspace(1)* %out + + %md.25ulp = fdiv float 1.0, %x, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %arcp.no.md = fdiv arcp float 1.0, %x + store volatile float %arcp.no.md, float addrspace(1)* %out + + %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0 + store volatile float %arcp.25ulp, float addrspace(1)* %out + + %fast.no.md = fdiv fast float 1.0, %x + store volatile float %fast.no.md, float addrspace(1)* %out + + %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 + store volatile float %fast.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + +; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 +; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 +; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0 +; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 +; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 +; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 +; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0 +; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 +define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { + %no.md = fdiv <2 x float> %a, %b + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out + + %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 + store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} +; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} + +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out +define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( +; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x +; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x +; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}} + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; FIXME: Should be able to get fdiv for 1.0 component +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { + %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 + + %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_f32_denormals( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0 +; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +attributes #0 = { nounwind optnone noinline } +attributes #1 = { nounwind } +attributes #2 = { nounwind "target-features"="+fp32-denormals" } + +; CHECK: !0 = !{float 2.500000e+00} +; CHECK: !1 = !{float 5.000000e-01} +; CHECK: !2 = !{float 1.000000e+00} +; CHECK: !3 = !{float 3.000000e+00} + +!0 = !{float 2.500000e+00} +!1 = !{float 5.000000e-01} +!2 = !{float 1.000000e+00} +!3 = !{float 3.000000e+00} diff --git a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 7b5158629091..bd0817d30413 100644 --- a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -417,12 +417,6 @@ entry: ret void } -; HSAOPT: !0 = !{} -; HSAOPT: !1 = !{i32 0, i32 2048} - -; NOHSAOPT: !0 = !{i32 0, i32 2048} - - ; FUNC-LABEL: v16i32_stack: ; R600: MOVA_INT @@ -527,4 +521,33 @@ define void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) { ret void } +; OPT-LABEL: @direct_alloca_read_0xi32( +; OPT: store [0 x i32] undef, [0 x i32] addrspace(3)* +; OPT: load [0 x i32], [0 x i32] addrspace(3)* +define void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) { +entry: + %tmp = alloca [0 x i32] + store [0 x i32] [], [0 x i32]* %tmp + %load = load [0 x i32], [0 x i32]* %tmp + store [0 x i32] %load, [0 x i32] addrspace(1)* %out + ret void +} + +; OPT-LABEL: @direct_alloca_read_1xi32( +; OPT: store [1 x i32] zeroinitializer, [1 x i32] addrspace(3)* +; OPT: load [1 x i32], [1 x i32] addrspace(3)* +define void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) { +entry: + %tmp = alloca [1 x i32] + store [1 x i32] [i32 0], [1 x i32]* %tmp + %load = load [1 x i32], [1 x i32]* %tmp + store [1 x i32] %load, [1 x i32] addrspace(1)* %out + ret void +} + attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" } + +; HSAOPT: !0 = !{} +; HSAOPT: !1 = !{i32 0, i32 2048} + +; NOHSAOPT: !0 = !{i32 0, i32 2048} diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index ff730a085255..00636240bc6c 100644 --- a/test/CodeGen/AMDGPU/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll @@ -6,7 +6,6 @@ ; GCN-LABEL: {{^}}test_branch: ; GCNNOOPT: v_writelane_b32 ; GCNNOOPT: v_writelane_b32 -; GCNNOOPT: v_writelane_b32 ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]] ; GCN: ; BB#1 diff --git a/test/CodeGen/AMDGPU/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll index 4021233e7785..65464cdba604 100644 --- a/test/CodeGen/AMDGPU/fdiv.ll +++ b/test/CodeGen/AMDGPU/fdiv.ll @@ -1,8 +1,4 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; These tests check that fdiv is expanded correctly and also test that the @@ -15,22 +11,59 @@ ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 +; SI: v_div_scale_f32 +; SI-DAG: v_div_scale_f32 ; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_mul_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_div_fmas_f32 +; SI: v_div_fixup_f32 +define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { +entry: + %fdiv = fdiv float %a, %b + store float %fdiv, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fdiv_25ulp_f32: +; SI: v_cndmask_b32 +; SI: v_mul_f32 +; SI: v_rcp_f32 +; SI: v_mul_f32 +; SI: v_mul_f32 +define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 { +entry: + %fdiv = fdiv float %a, %b, !fpmath !0 + store float %fdiv, float addrspace(1)* %out + ret void +} + +; Use correct fdiv +; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32: +; SI: v_fma_f32 +; SI: v_div_fmas_f32 +; SI: v_div_fixup_f32 +define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { +entry: + %fdiv = fdiv float %a, %b, !fpmath !0 + store float %fdiv, float addrspace(1)* %out + ret void +} -; I754-DAG: v_div_scale_f32 -; I754-DAG: v_rcp_f32 -; I754-DAG: v_fma_f32 -; I754-DAG: v_mul_f32 -; I754-DAG: v_fma_f32 -; I754-DAG: v_div_fixup_f32 -define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { +; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { entry: - %0 = fdiv float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv fast float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -38,15 +71,14 @@ entry: ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) { +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %0 = fdiv fast float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv fast float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -54,15 +86,14 @@ entry: ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) { +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %0 = fdiv arcp float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv arcp float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -72,26 +103,24 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { +entry: + %fdiv = fdiv <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out + ret void +} -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32: +; SI: v_cmp_gt_f32 +; SI: v_cmp_gt_f32 +define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -101,19 +130,12 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv fast <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv fast <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -123,19 +145,12 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv arcp <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv arcp <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -149,37 +164,11 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 - -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -198,24 +187,11 @@ define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1) ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -234,24 +210,11 @@ define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> ad ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -259,3 +222,9 @@ define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> ad store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } + +attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" } +attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" } +attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" } + +!0 = !{float 2.500000e+00} diff --git a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll index be23e10d7087..1537d67cadcc 100644 --- a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; FUNC-LABEL: @fp_to_sint_f64_i32 ; SI: v_cvt_i32_f64_e32 @@ -54,3 +55,23 @@ define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in store i64 %cast, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptosi double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptosi double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_sint.ll b/test/CodeGen/AMDGPU/fp_to_sint.ll index b39aeadc8cce..0cd0358bafd5 100644 --- a/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC -declare float @llvm.fabs.f32(float) #0 +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -17,7 +17,7 @@ define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { - %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptosi float %in.fabs to i32 store i32 %conv, i32 addrspace(1)* %out ret void @@ -227,4 +227,26 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { ret void } -attributes #0 = { nounwind readnone } +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y, +; EG-NEXT: -1082130432(-1.000000e+00) +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptosi float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptosi float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll index 760019ebdc08..d5bc416434df 100644 --- a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; SI-LABEL: {{^}}fp_to_uint_i32_f64: ; SI: v_cvt_u32_f64_e32 @@ -68,3 +69,23 @@ define void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> % store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32 ret void } + +; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptoui double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptoui double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_uint.ll b/test/CodeGen/AMDGPU/fp_to_uint.ll index b7b6ccc238b3..8a0f9fa2ac2b 100644 --- a/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -1,6 +1,8 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC + +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -215,3 +217,27 @@ define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + + +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0, +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptoui float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptoui float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll new file mode 100644 index 000000000000..4e17a921d91b --- /dev/null +++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll @@ -0,0 +1,8 @@ +; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s +; check llc does not crash for invalid opencl version metadata + +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 256 + +!opencl.ocl.version = !{} diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll new file mode 100644 index 000000000000..35b7d70596c1 --- /dev/null +++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s +; check llc does not crash for invalid opencl version metadata + +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 256 + +!opencl.ocl.version = !{!0} +!0 = !{} diff --git a/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll new file mode 100644 index 000000000000..e1693551b621 --- /dev/null +++ b/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s +; check llc does not crash for invalid opencl version metadata + +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 256 + +!opencl.ocl.version = !{!0} +!0 = !{i32 1} diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll new file mode 100644 index 000000000000..54d7848da3bf --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +declare float @llvm.amdgcn.fdiv.fast(float, float) #0 + +; CHECK-LABEL: {{^}}test_fdiv_fast: +; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc +; CHECK: v_mul_f32_e32 +; CHECK: v_rcp_f32_e32 +; CHECK: v_mul_f32_e32 +; CHECK: v_mul_f32_e32 +define void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 { + %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b) + store float %fdiv, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticgroup.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll index cf6d1ab237cd..6014e2ed85f8 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticgroup.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -2,13 +2,14 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s - @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4 -; FUNC-LABEL: {{^}}groupstaticsize_test0: -; CHECK: s_movk_i32 s{{[0-9]+}}, 0x800 -define void @get_groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 { +@large = addrspace(3) global [4096 x i32] undef, align 4 + +; CHECK-LABEL: {{^}}groupstaticsize_test0: +; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0x800{{$}} +define void @groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 64 %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1 @@ -20,9 +21,8 @@ define void @get_groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1 ret void } - -; FUNC-LABEL: {{^}}groupstaticsize_test1: -; CHECK: s_movk_i32 s{{[0-9]+}}, 0xc00 +; CHECK-LABEL: {{^}}groupstaticsize_test1: +; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xc00{{$}} define void @groupstaticsize_test1(float addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %lds_size) { entry: %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1 @@ -48,6 +48,16 @@ endif: ; preds = %else, %if ret void } +; Exceeds 16-bit simm limit of s_movk_i32 +; CHECK-LABEL: {{^}}large_groupstaticsize: +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}} +define void @large_groupstaticsize(i32 addrspace(1)* %size, i32 %idx) #0 { + %gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(3)* @large, i32 0, i32 %idx + store volatile i32 0, i32 addrspace(3)* %gep + %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() + store i32 %static_lds_size, i32 addrspace(1)* %size + ret void +} declare i32 @llvm.amdgcn.groupstaticsize() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/rcp-pattern.ll b/test/CodeGen/AMDGPU/rcp-pattern.ll index b1d422062543..27a88f7b59e7 100644 --- a/test/CodeGen/AMDGPU/rcp-pattern.ll +++ b/test/CodeGen/AMDGPU/rcp-pattern.ll @@ -1,11 +1,96 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; FIXME: Evergreen only ever does unsafe fp math. ; FUNC-LABEL: {{^}}rcp_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] +; GCN: buffer_store_dword [[RCP]] + ; EG: RECIP_IEEE -define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { +define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 { %rcp = fdiv float 1.0, %src store float %rcp, float addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] +; GCN: buffer_store_dword [[RCP]] + +; EG: RECIP_IEEE +define void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { + %rcp = fdiv float 1.0, %src, !fpmath !0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] +; GCN: buffer_store_dword [[RCP]] + +; EG: RECIP_IEEE +define void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { + %rcp = fdiv fast float 1.0, %src, !fpmath !0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] +; GCN: buffer_store_dword [[RCP]] + +; EG: RECIP_IEEE +define void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { + %rcp = fdiv arcp float 1.0, %src, !fpmath !0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] +; GCN: buffer_store_dword [[RCP]] + +; EG: RECIP_IEEE +define void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 { + %rcp = fdiv float 1.0, %src, !fpmath !0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rcp_fabs_pat_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]| +; GCN: buffer_store_dword [[RCP]] + +; EG: RECIP_IEEE +define void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 { + %src.fabs = call float @llvm.fabs.f32(float %src) + %rcp = fdiv float 1.0, %src.fabs + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FIXME: fneg folded into constant 1 +; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32: +define void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 { + %src.fabs = call float @llvm.fabs.f32(float %src) + %src.fabs.fneg = fsub float -0.0, %src.fabs + %rcp = fdiv float 1.0, %src.fabs.fneg + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + + +declare float @llvm.fabs.f32(float) #1 + +attributes #0 = { nounwind "unsafe-fp-math"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind "unsafe-fp-math"="true" } + +!0 = !{float 2.500000e+00} diff --git a/test/CodeGen/AMDGPU/reciprocal.ll b/test/CodeGen/AMDGPU/reciprocal.ll deleted file mode 100644 index f9292a788521..000000000000 --- a/test/CodeGen/AMDGPU/reciprocal.ll +++ /dev/null @@ -1,13 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define amdgpu_ps void @test(<4 x float> inreg %reg0) { - %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = fdiv float 1.0, %r0 - %vec = insertelement <4 x float> undef, float %r1, i32 0 - call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0) - ret void -} - -declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 10187f6125d6..4ba4ac76a280 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -348,7 +348,6 @@ bb7: ; preds = %bb4 ; CHECK: image_sample_c ; CHECK: v_cmp_neq_f32_e32 vcc, 0, -; CHECK: s_and_b64 exec, exec, ; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc ; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec ; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]] @@ -385,6 +384,7 @@ bb9: ; preds = %bb4 declare void @llvm.AMDGPU.kill(float) #0 declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind attributes #0 = { nounwind } attributes #1 = { nounwind readnone }
\ No newline at end of file diff --git a/test/CodeGen/AMDGPU/vector-alloca.ll b/test/CodeGen/AMDGPU/vector-alloca.ll index c151ca9ef9b4..7dcf36f144ac 100644 --- a/test/CodeGen/AMDGPU/vector-alloca.ll +++ b/test/CodeGen/AMDGPU/vector-alloca.ll @@ -3,6 +3,11 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s + +; OPT-LABEL: @vector_read( +; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index +; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 ; FUNC-LABEL: {{^}}vector_read: ; EG: MOV @@ -12,21 +17,26 @@ ; EG: MOVA_INT define void @vector_read(i32 addrspace(1)* %out, i32 %index) { entry: - %0 = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3 + %tmp = alloca [4 x i32] + %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 store i32 0, i32* %x store i32 1, i32* %y store i32 2, i32* %z store i32 3, i32* %w - %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index - %2 = load i32, i32* %1 - store i32 %2, i32 addrspace(1)* %out + %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32* %tmp1 + store i32 %tmp2, i32 addrspace(1)* %out ret void } +; OPT-LABEL: @vector_write( +; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index +; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index +; OPT: store i32 %1, i32 addrspace(1)* %out, align 4 + ; FUNC-LABEL: {{^}}vector_write: ; EG: MOV ; EG: MOV @@ -36,42 +46,95 @@ entry: ; EG: MOVA_INT define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { entry: - %0 = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3 + %tmp = alloca [4 x i32] + %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 store i32 0, i32* %x store i32 0, i32* %y store i32 0, i32* %z store i32 0, i32* %w - %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index - store i32 1, i32* %1 - %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index - %3 = load i32, i32* %2 - store i32 %3, i32 addrspace(1)* %out + %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index + store i32 1, i32* %tmp1 + %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index + %tmp3 = load i32, i32* %tmp2 + store i32 %tmp3, i32 addrspace(1)* %out ret void } ; This test should be optimize to: ; store i32 0, i32 addrspace(1)* %out + +; OPT-LABEL: @bitcast_gep( +; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4 + ; FUNC-LABEL: {{^}}bitcast_gep: ; EG: STORE_RAW define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { entry: - %0 = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3 + %tmp = alloca [4 x i32] + %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 store i32 0, i32* %x store i32 0, i32* %y store i32 0, i32* %z store i32 0, i32* %w - %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1 - %2 = bitcast i32* %1 to [4 x i32]* - %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0 - %4 = load i32, i32* %3 - store i32 %4, i32 addrspace(1)* %out + %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %tmp2 = bitcast i32* %tmp1 to [4 x i32]* + %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0 + %tmp4 = load i32, i32* %tmp3 + store i32 %tmp4, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @vector_read_bitcast_gep( +; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index +; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 +define void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) { +entry: + %tmp = alloca [4 x i32] + %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 + %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 + %bc = bitcast i32* %x to float* + store float 1.0, float* %bc + store i32 1, i32* %y + store i32 2, i32* %z + store i32 3, i32* %w + %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32* %tmp1 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FIXME: Should be able to promote this. Instcombine should fold the +; cast in the hasOneUse case so it might not matter in practice + +; OPT-LABEL: @vector_read_bitcast_alloca( +; OPT: alloca [4 x float] +; OPT: store float +; OPT: store float +; OPT: store float +; OPT: store float +; OPT: load float +define void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) { +entry: + %tmp = alloca [4 x i32] + %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]* + %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0 + %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1 + %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2 + %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3 + store float 0.0, float* %x + store float 1.0, float* %y + store float 2.0, float* %z + store float 4.0, float* %w + %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index + %tmp2 = load float, float* %tmp1 + store float %tmp2, float addrspace(1)* %out ret void } diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll index 23b0ffd5b3da..809a7ba9b826 100644 --- a/test/CodeGen/AMDGPU/wqm.ll +++ b/test/CodeGen/AMDGPU/wqm.ll @@ -41,14 +41,14 @@ main_body: ;CHECK: store ;CHECK-NOT: exec ;CHECK: .size test3 -define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { +define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x i32> %c) { main_body: %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %tex.1 = bitcast <4 x float> %tex to <4 x i32> %tex.2 = extractelement <4 x i32> %tex.1, i32 0 - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %tex.2 - %wr = extractelement <4 x float> %tex, i32 1 - store float %wr, float addrspace(1)* %gep + + call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %tex, <4 x i32> undef, i32 %tex.2, i32 0, i1 0, i1 0) + ret <4 x float> %tex } @@ -66,8 +66,9 @@ main_body: define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %d, float %data) { main_body: %c.1 = mul i32 %c, %d - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.1 - store float %data, float addrspace(1)* %gep + + call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret <4 x float> %tex } @@ -89,7 +90,7 @@ main_body: ;CHECK: s_mov_b64 exec, [[SAVED]] ;CHECK: %IF ;CHECK: image_sample -define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) { +define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) { main_body: %cmp = icmp eq i32 %z, 0 br i1 %cmp, label %IF, label %ELSE @@ -100,8 +101,7 @@ IF: br label %END ELSE: - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c - store float %data, float addrspace(1)* %gep + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0) br label %END END: @@ -129,7 +129,7 @@ END: ;CHECK: s_or_b64 exec, exec, ;CHECK: v_mov_b32_e32 v0 ;CHECK: ; return -define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) { +define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) { main_body: %cmp = icmp eq i32 %z, 0 br i1 %cmp, label %ELSE, label %IF @@ -140,8 +140,7 @@ IF: br label %END ELSE: - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c - store float %data, float addrspace(1)* %gep + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0) br label %END END: @@ -163,23 +162,20 @@ END: ;CHECK: store ;CHECK: s_wqm_b64 exec, exec ;CHECK: v_cmp -define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) { +define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <3 x i32> %idx, <2 x float> %data, i32 %coord) { main_body: %idx.1 = extractelement <3 x i32> %idx, i32 0 - %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1 %data.1 = extractelement <2 x float> %data, i32 0 - store float %data.1, float addrspace(1)* %gep.1 + call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0) ; The load that determines the branch (and should therefore be WQM) is ; surrounded by stores that require disabled WQM. %idx.2 = extractelement <3 x i32> %idx, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.2 - %z = load float, float addrspace(1)* %gep.2 + %z = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i1 0, i1 0) %idx.3 = extractelement <3 x i32> %idx, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.3 %data.3 = extractelement <2 x float> %data, i32 1 - store float %data.3, float addrspace(1)* %gep.3 + call void @llvm.amdgcn.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i1 0, i1 0) %cc = fcmp ogt float %z, 0.0 br i1 %cc, label %IF, label %ELSE @@ -210,24 +206,21 @@ END: ;CHECK: load ;CHECK: store ;CHECK: v_cmp -define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) { +define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <3 x i32> %idx, <2 x float> %data, i32 %coord) { main_body: %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %tex.1 = extractelement <4 x float> %tex, i32 0 %idx.1 = extractelement <3 x i32> %idx, i32 0 - %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1 %data.1 = extractelement <2 x float> %data, i32 0 - store float %data.1, float addrspace(1)* %gep.1 + call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0) %idx.2 = extractelement <3 x i32> %idx, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.2 - %z = load float, float addrspace(1)* %gep.2 + %z = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i1 0, i1 0) %idx.3 = extractelement <3 x i32> %idx, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.3 %data.3 = extractelement <2 x float> %data, i32 1 - store float %data.3, float addrspace(1)* %gep.3 + call void @llvm.amdgcn.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i1 0, i1 0) %cc = fcmp ogt float %z, 0.0 br i1 %cc, label %IF, label %ELSE @@ -258,15 +251,14 @@ END: ;CHECK: s_mov_b64 exec, [[SAVE]] ;CHECK: %END ;CHECK: image_sample -define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %coord, i32 %y, float %z) { +define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %coord, i32 %y, float %z) { main_body: %cond = icmp eq i32 %y, 0 br i1 %cond, label %IF, label %END IF: - %data = load float, float addrspace(1)* %ptr - %gep = getelementptr float, float addrspace(1)* %ptr, i32 1 - store float %data, float addrspace(1)* %gep + %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 0, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0) br label %END END: @@ -282,13 +274,11 @@ END: ;CHECK-NEXT: s_wqm_b64 exec, exec ;CHECK: image_sample ;CHECK: s_and_b64 exec, exec, [[ORIG]] -;SI: buffer_store_dword -;VI: flat_store_dword +;CHECK: buffer_store_dword ;CHECK: s_wqm_b64 exec, exec ;CHECK: v_cmpx_ ;CHECK: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[ORIG]] -;SI: buffer_store_dword -;VI: flat_store_dword +;CHECK: buffer_store_dword ;CHECK: s_mov_b64 exec, [[SAVE]] ;CHECK: image_sample define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) { @@ -296,16 +286,14 @@ main_body: %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %idx.0 = extractelement <2 x i32> %idx, i32 0 - %gep.0 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.0 %data.0 = extractelement <2 x float> %data, i32 0 - store float %data.0, float addrspace(1)* %gep.0 + call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0) call void @llvm.AMDGPU.kill(float %z) %idx.1 = extractelement <2 x i32> %idx, i32 1 - %gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1 %data.1 = extractelement <2 x float> %data, i32 1 - store float %data.1, float addrspace(1)* %gep.1 + call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0) %tex2 = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %out = fadd <4 x float> %tex, %tex2 @@ -321,16 +309,14 @@ main_body: ; CHECK: s_wqm_b64 exec, exec ; CHECK: image_sample ; CHECK: s_and_b64 exec, exec, [[ORIG]] -; SI: buffer_store_dword -; VI: flat_store_dword +; CHECK: buffer_store_dword ; CHECK-NOT: wqm ; CHECK: v_cmpx_ -define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) { +define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) { main_body: %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx - store float %data, float addrspace(1)* %gep + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) call void @llvm.AMDGPU.kill(float %z) @@ -350,9 +336,91 @@ main_body: ret float %s } +; CHECK-LABEL: {{^}}test_loop_vcc: +; CHECK-NEXT: ; %entry +; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec +; CHECK: s_wqm_b64 exec, exec +; CHECK: s_and_b64 exec, exec, [[LIVE]] +; CHECK: image_store +; CHECK: s_wqm_b64 exec, exec +; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2 +; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]] + +; CHECK: [[LOOPHDR]]: ; %loop +; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]] +; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]] +; CHECK: s_cbranch_vccz +; CHECK: ; %break + +; CHECK: ; return +define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { +entry: + call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0) + br label %loop + +loop: + %ctr.iv = phi i32 [ 0, %entry ], [ %ctr.next, %body ] + %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ] + %cc = icmp sgt i32 %ctr.iv, 7 + br i1 %cc, label %break, label %body + +body: + %c.i = bitcast <4 x float> %c.iv to <4 x i32> + %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %ctr.next = add i32 %ctr.iv, 2 + br label %loop + +break: + ret <4 x float> %c.iv +} + +; Only intrinsic stores need exact execution -- other stores do not have +; externally visible effects and may require WQM for correctness. +; +; CHECK-LABEL: {{^}}test_alloca: +; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec +; CHECK: s_wqm_b64 exec, exec + +; CHECK: s_and_b64 exec, exec, [[LIVE]] +; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 +; CHECK: s_wqm_b64 exec, exec +; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen +; CHECK: s_and_b64 exec, exec, [[LIVE]] +; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen +; CHECK: s_wqm_b64 exec, exec +; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen + +; CHECK: image_sample +; CHECK: s_and_b64 exec, exec, [[LIVE]] +; CHECK: buffer_store_dwordx4 +define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { +entry: + %array = alloca [32 x i32], align 4 + + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) + + %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0 + store volatile i32 %a, i32* %s.gep, align 4 + + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0) + + %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx + %c = load i32, i32* %c.gep, align 4 + + %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + + call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) + + ret void +} + + declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1 +declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 +declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2 declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3 diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 151cc1b12ed2..04eae8f9afec 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: beq +; V8-NEXT: bne ; V8-NEXT: b ; The trailing space in the last line checks that the branch is unconditional switch i32 %and, label %sw.epilog [ diff --git a/test/CodeGen/ARM/ssat-v4t.ll b/test/CodeGen/ARM/ssat-v4t.ll new file mode 100644 index 000000000000..3d74c88da827 --- /dev/null +++ b/test/CodeGen/ARM/ssat-v4t.ll @@ -0,0 +1,9 @@ +; RUN: not llc -O1 -mtriple=armv4t-none-none-eabi %s -o - 2>&1 | FileCheck %s + +; CHECK: Cannot select: intrinsic %llvm.arm.ssat +define i32 @ssat() nounwind { + %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1) + ret i32 %tmp +} + +declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone diff --git a/test/CodeGen/ARM/ssat.ll b/test/CodeGen/ARM/ssat.ll index 2b75bc410aa8..f1e11dd33d1f 100644 --- a/test/CodeGen/ARM/ssat.ll +++ b/test/CodeGen/ARM/ssat.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=armv4t-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V4T +; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6T2 ; Check for several conditions that should result in SSAT. ; For example, the base test is equivalent to @@ -16,7 +17,8 @@ ; 32-bit base test define i32 @sat_base_32bit(i32 %x) #0 { ; CHECK-LABEL: sat_base_32bit: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpLow = icmp slt i32 %x, -8388608 %cmpUp = icmp sgt i32 %x, 8388607 @@ -29,7 +31,8 @@ entry: ; 16-bit base test define i16 @sat_base_16bit(i16 %x) #0 { ; CHECK-LABEL: sat_base_16bit: -; CHECK: ssat r0, #12, r0 +; V6T2: ssat r0, #12, r0 +; V4T-NOT: ssat entry: %cmpLow = icmp slt i16 %x, -2048 %cmpUp = icmp sgt i16 %x, 2047 @@ -42,7 +45,8 @@ entry: ; 8-bit base test define i8 @sat_base_8bit(i8 %x) #0 { ; CHECK-LABEL: sat_base_8bit: -; CHECK: ssat r0, #6, r0 +; V6T2: ssat r0, #6, r0 +; V4T-NOT: ssat entry: %cmpLow = icmp slt i8 %x, -32 %cmpUp = icmp sgt i8 %x, 31 @@ -60,7 +64,8 @@ entry: ; x < -k ? -k : (x < k ? x : k) define i32 @sat_lower_upper_1(i32 %x) #0 { ; CHECK-LABEL: sat_lower_upper_1: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpLow = icmp slt i32 %x, -8388608 %cmpUp = icmp slt i32 %x, 8388607 @@ -72,7 +77,8 @@ entry: ; x > -k ? (x > k ? k : x) : -k define i32 @sat_lower_upper_2(i32 %x) #0 { ; CHECK-LABEL: sat_lower_upper_2: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpLow = icmp sgt i32 %x, -8388608 %cmpUp = icmp sgt i32 %x, 8388607 @@ -84,7 +90,8 @@ entry: ; x < k ? (x < -k ? -k : x) : k define i32 @sat_upper_lower_1(i32 %x) #0 { ; CHECK-LABEL: sat_upper_lower_1: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpUp = icmp slt i32 %x, 8388607 %cmpLow = icmp slt i32 %x, -8388608 @@ -96,7 +103,8 @@ entry: ; x > k ? k : (x < -k ? -k : x) define i32 @sat_upper_lower_2(i32 %x) #0 { ; CHECK-LABEL: sat_upper_lower_2: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpUp = icmp sgt i32 %x, 8388607 %cmpLow = icmp slt i32 %x, -8388608 @@ -108,7 +116,8 @@ entry: ; k < x ? k : (x > -k ? x : -k) define i32 @sat_upper_lower_3(i32 %x) #0 { ; CHECK-LABEL: sat_upper_lower_3: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpUp = icmp slt i32 8388607, %x %cmpLow = icmp sgt i32 %x, -8388608 @@ -125,7 +134,8 @@ entry: ; k <= x ? k : (x >= -k ? x : -k) define i32 @sat_le_ge(i32 %x) #0 { ; CHECK-LABEL: sat_le_ge: -; CHECK: ssat r0, #24, r0 +; V6T2: ssat r0, #24, r0 +; V4T-NOT: ssat entry: %cmpUp = icmp sle i32 8388607, %x %cmpLow = icmp sge i32 %x, -8388608 diff --git a/test/CodeGen/ARM/usat-v4t.ll b/test/CodeGen/ARM/usat-v4t.ll new file mode 100644 index 000000000000..572c760e3ae6 --- /dev/null +++ b/test/CodeGen/ARM/usat-v4t.ll @@ -0,0 +1,9 @@ +; RUN: not llc -O1 -mtriple=armv4t-none-none-eabi %s -o - 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat +define i32 @usat1() nounwind { + %tmp = call i32 @llvm.arm.usat(i32 128, i32 31) + ret i32 %tmp +} + +declare i32 @llvm.arm.usat(i32, i32) nounwind readnone diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll index f736ddd0def6..c0229c626a0e 100644 --- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll +++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll @@ -11,13 +11,13 @@ entry: ; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]]) ; STATIC-O32: lui $[[R0:[0-9]+]], %hi($CPI0_0) ; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]]) -; PIC-N32: lw $[[R0:[0-9]+]], %got_page($CPI0_0) -; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]]) -; STATIC-N32: lui $[[R0:[0-9]+]], %hi($CPI0_0) -; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]]) -; PIC-N64: ld $[[R0:[0-9]+]], %got_page($CPI0_0) -; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]]) -; STATIC-N64: ld $[[R0:[0-9]+]], %got_page($CPI0_0) -; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]]) +; PIC-N32: lw $[[R0:[0-9]+]], %got_page(.LCPI0_0) +; PIC-N32: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]]) +; STATIC-N32: lui $[[R0:[0-9]+]], %hi(.LCPI0_0) +; STATIC-N32: lwc1 $f0, %lo(.LCPI0_0)($[[R0]]) +; PIC-N64: ld $[[R0:[0-9]+]], %got_page(.LCPI0_0) +; PIC-N64: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]]) +; STATIC-N64: ld $[[R0:[0-9]+]], %got_page(.LCPI0_0) +; STATIC-N64: lwc1 $f0, %got_ofst(.LCPI0_0)($[[R0]]) ret float 0x400B333340000000 } diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll index 7d66d1a1a204..5f0a0a5a4929 100644 --- a/test/CodeGen/Mips/2010-07-20-Switch.ll +++ b/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -27,9 +27,9 @@ entry: ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]] ; PIC-O32: jr $[[R5]] ; N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3 -; N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0) +; N64: ld $[[R1:[0-9]+]], %got_page(.LJTI0_0) ; N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]] -; N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]]) +; N64: ld $[[R4:[0-9]+]], %got_ofst(.LJTI0_0)($[[R2]]) ; N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]] ; N64: jr $[[R5]] switch i32 %0, label %bb4 [ @@ -68,7 +68,7 @@ bb5: ; preds = %entry ; PIC-O32: .gpword ; PIC-O32: .gpword ; N64: .p2align 3 -; N64: $JTI0_0: +; N64: .LJTI0_0: ; N64: .gpdword ; N64: .gpdword ; N64: .gpdword diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll index 377fe9327e0e..62150875e75b 100644 --- a/test/CodeGen/Mips/analyzebranch.ll +++ b/test/CodeGen/Mips/analyzebranch.ll @@ -10,7 +10,7 @@ define double @foo(double %a, double %b) nounwind readnone { entry: ; ALL-LABEL: foo: -; FCC: bc1f $BB +; FCC: bc1f {{\$|\.L}}BB ; FCC: nop ; 32-GPR: mtc1 $zero, $[[Z:f[0-9]]] @@ -19,7 +19,7 @@ entry: ; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnezc $[[GPRCC]], $BB +; GPR: bnezc $[[GPRCC]], {{\$|\.L}}BB %cmp = fcmp ogt double %a, 0.000000e+00 br i1 %cmp, label %if.end6, label %if.else @@ -43,7 +43,7 @@ define void @f1(float %f) nounwind { entry: ; ALL-LABEL: f1: -; FCC: bc1f $BB +; FCC: bc1f {{\$|\.L}}BB ; FCC: nop ; GPR: mtc1 $zero, $[[Z:f[0-9]]] diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll index 8f4ccb19958a..dfba8ba19331 100644 --- a/test/CodeGen/Mips/atomic.ll +++ b/test/CodeGen/Mips/atomic.ll @@ -34,17 +34,17 @@ entry: ; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x) ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)( -; O0: $[[BB0:[A-Z_0-9]+]]: +; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; O0: ld $[[R1:[0-9]+]] ; O0-NEXT: ll $[[R2:[0-9]+]], 0($[[R1]]) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R3:[0-9]+]], 0($[[R0]]) ; ALL: addu $[[R4:[0-9]+]], $[[R3]], $4 ; ALL: sc $[[R4]], 0($[[R0]]) -; NOT-MICROMIPS: beqz $[[R4]], $[[BB0]] -; MICROMIPS: beqzc $[[R4]], $[[BB0]] -; MIPSR6: beqzc $[[R4]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R4]], [[BB0]] +; MICROMIPS: beqzc $[[R4]], [[BB0]] +; MIPSR6: beqzc $[[R4]], [[BB0]] } define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { @@ -59,14 +59,14 @@ entry: -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R1:[0-9]+]], 0($[[R0]]) ; ALL: and $[[R3:[0-9]+]], $[[R1]], $4 ; ALL: nor $[[R2:[0-9]+]], $zero, $[[R3]] ; ALL: sc $[[R2]], 0($[[R0]]) -; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] -; MICROMIPS: beqzc $[[R2]], $[[BB0]] -; MIPSR6: beqzc $[[R2]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] +; MICROMIPS: beqzc $[[R2]], [[BB0]] +; MIPSR6: beqzc $[[R2]], [[BB0]] } define i32 @AtomicSwap32(i32 signext %newval) nounwind { @@ -82,12 +82,12 @@ entry: ; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x) ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll ${{[0-9]+}}, 0($[[R0]]) ; ALL: sc $[[R2:[0-9]+]], 0($[[R0]]) -; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] -; MICROMIPS: beqzc $[[R2]], $[[BB0]] -; MIPSR6: beqzc $[[R2]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] +; MICROMIPS: beqzc $[[R2]], [[BB0]] +; MIPSR6: beqzc $[[R2]], [[BB0]] } define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { @@ -104,16 +104,16 @@ entry: ; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x) ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)( -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $2, 0($[[R0]]) -; NOT-MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]] -; MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]] -; MIPSR6: bnec $2, $4, $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MIPSR6: bnec $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] ; ALL: sc $[[R2:[0-9]+]], 0($[[R0]]) -; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] -; MICROMIPS: beqzc $[[R2]], $[[BB0]] -; MIPSR6: beqzc $[[R2]], $[[BB0]] -; ALL: $[[BB1]]: +; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] +; MICROMIPS: beqzc $[[R2]], [[BB0]] +; MIPSR6: beqzc $[[R2]], [[BB0]] +; ALL: [[BB1]]: } @@ -141,20 +141,20 @@ entry: ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] ; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] -; O0: $[[BB0:[A-Z_0-9]+]]: +; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; O0: ld $[[R10:[0-9]+]] ; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) ; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] ; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] ; ALL: sc $[[R16]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] -; MICROMIPS: beqzc $[[R16]], $[[BB0]] -; MIPSR6: beqzc $[[R16]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] +; MICROMIPS: beqzc $[[R16]], [[BB0]] +; MIPSR6: beqzc $[[R16]], [[BB0]] ; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]] ; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]] @@ -186,20 +186,20 @@ entry: ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] ; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] -; O0: $[[BB0:[A-Z_0-9]+]]: +; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; O0: ld $[[R10:[0-9]+]] ; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) ; ALL: subu $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] ; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] ; ALL: sc $[[R16]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] -; MICROMIPS: beqzc $[[R16]], $[[BB0]] -; MIPSR6: beqzc $[[R16]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] +; MICROMIPS: beqzc $[[R16]], [[BB0]] +; MIPSR6: beqzc $[[R16]], [[BB0]] ; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]] ; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]] @@ -231,11 +231,11 @@ entry: ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] ; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] -; O0: $[[BB0:[A-Z_0-9]+]]: +; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; O0: ld $[[R10:[0-9]+]] ; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: nor $[[R14:[0-9]+]], $zero, $[[R13]] @@ -243,9 +243,9 @@ entry: ; ALL: and $[[R16:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R17:[0-9]+]], $[[R16]], $[[R15]] ; ALL: sc $[[R17]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R17]], $[[BB0]] -; MICROMIPS: beqzc $[[R17]], $[[BB0]] -; MIPSR6: beqzc $[[R17]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R17]], [[BB0]] +; MICROMIPS: beqzc $[[R17]], [[BB0]] +; MIPSR6: beqzc $[[R17]], [[BB0]] ; ALL: and $[[R18:[0-9]+]], $[[R12]], $[[R7]] ; ALL: srlv $[[R19:[0-9]+]], $[[R18]], $[[R5]] @@ -277,15 +277,15 @@ entry: ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] ; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R10:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R18:[0-9]+]], $[[R9]], $[[R7]] ; ALL: and $[[R13:[0-9]+]], $[[R10]], $[[R8]] ; ALL: or $[[R14:[0-9]+]], $[[R13]], $[[R18]] ; ALL: sc $[[R14]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] -; MICROMIPS: beqzc $[[R14]], $[[BB0]] -; MIPSR6: beqzc $[[R14]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R14]], [[BB0]] +; MICROMIPS: beqzc $[[R14]], [[BB0]] +; MIPSR6: beqzc $[[R14]], [[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -322,21 +322,21 @@ entry: ; ALL: andi $[[R11:[0-9]+]], $5, 255 ; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]] -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R13:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] -; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] -; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] -; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MIPSR6: bnec $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] ; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]] ; ALL: sc $[[R16]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] -; MICROMIPS: beqzc $[[R16]], $[[BB0]] -; MIPSR6: beqzc $[[R16]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] +; MICROMIPS: beqzc $[[R16]], [[BB0]] +; MIPSR6: beqzc $[[R16]], [[BB0]] -; ALL: $[[BB1]]: +; ALL: [[BB1]]: ; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]] ; NO-SEB-SEH: sll $[[R18:[0-9]+]], $[[R17]], 24 @@ -366,21 +366,21 @@ entry: ; ALL: andi $[[R11:[0-9]+]], $6, 255 ; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]] -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R13:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] -; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] -; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] -; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MICROMIPS: bne $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MIPSR6: bnec $[[R14]], $[[R10]], [[BB1:(\$|\.L)[A-Z_0-9]+]] ; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]] ; ALL: sc $[[R16]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] -; MICROMIPS: beqzc $[[R16]], $[[BB0]] -; MIPSR6: beqzc $[[R16]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] +; MICROMIPS: beqzc $[[R16]], [[BB0]] +; MIPSR6: beqzc $[[R16]], [[BB0]] -; ALL: $[[BB1]]: +; ALL: [[BB1]]: ; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]] ; NO-SEB-SEH: sll $[[R18:[0-9]+]], $[[R17]], 24 @@ -423,20 +423,20 @@ entry: ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] ; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] -; O0: $[[BB0:[A-Z_0-9]+]]: +; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; O0: ld $[[R10:[0-9]+]] ; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) ; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] ; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] ; ALL: sc $[[R16]], 0($[[R2]]) -; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] -; MICROMIPS: beqzc $[[R16]], $[[BB0]] -; MIPSR6: beqzc $[[R16]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] +; MICROMIPS: beqzc $[[R16]], [[BB0]] +; MIPSR6: beqzc $[[R16]], [[BB0]] ; ALL: and $[[R17:[0-9]+]], $[[R12]], $[[R7]] ; ALL: srlv $[[R18:[0-9]+]], $[[R17]], $[[R5]] @@ -465,15 +465,15 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; ALL: sync ; ALL: andi $[[R3:[0-9]+]], $[[R2]], 65535 -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R4:[0-9]+]], 0($[[R5:[0-9]+]]) ; ALL: and $[[R6:[0-9]+]], $[[R4]], $ ; ALL: and $[[R7:[0-9]+]], $[[R4]], $ ; ALL: or $[[R8:[0-9]+]], $[[R7]], $ ; ALL: sc $[[R8]], 0($[[R5]]) -; NOT-MICROMIPS: beqz $[[R8]], $[[BB0]] -; MICROMIPS: beqzc $[[R8]], $[[BB0]] -; MIPSR6: beqzc $[[R8]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R8]], [[BB0]] +; MICROMIPS: beqzc $[[R8]], [[BB0]] +; MIPSR6: beqzc $[[R8]], [[BB0]] ; ALL: srlv $[[R9:[0-9]+]], $[[R6]], $ @@ -538,11 +538,11 @@ entry: ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)( ; ALL: addiu $[[PTR:[0-9]+]], $[[R0]], 1024 -; ALL: $[[BB0:[A-Z_0-9]+]]: +; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R1:[0-9]+]], 0($[[PTR]]) ; ALL: addu $[[R2:[0-9]+]], $[[R1]], $4 ; ALL: sc $[[R2]], 0($[[PTR]]) -; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] -; MICROMIPS: beqzc $[[R2]], $[[BB0]] -; MIPSR6: beqzc $[[R2]], $[[BB0]] +; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] +; MICROMIPS: beqzc $[[R2]], [[BB0]] +; MIPSR6: beqzc $[[R2]], [[BB0]] } diff --git a/test/CodeGen/Mips/blez_bgez.ll b/test/CodeGen/Mips/blez_bgez.ll index dcda047f8d09..84c8af45db81 100644 --- a/test/CodeGen/Mips/blez_bgez.ll +++ b/test/CodeGen/Mips/blez_bgez.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=mips64el < %s | FileCheck %s ; CHECK-LABEL: test_blez: -; CHECK: blez ${{[0-9]+}}, $BB +; CHECK: blez ${{[0-9]+}}, {{\$|\.L}}BB define void @test_blez(i32 %a) { entry: @@ -20,7 +20,7 @@ if.end: declare void @foo1() ; CHECK-LABEL: test_bgez: -; CHECK: bgez ${{[0-9]+}}, $BB +; CHECK: bgez ${{[0-9]+}}, {{\$|\.L}}BB define void @test_bgez(i32 %a) { entry: diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll index f74363702af5..9bc9a305a204 100644 --- a/test/CodeGen/Mips/blockaddr.ll +++ b/test/CodeGen/Mips/blockaddr.ll @@ -22,22 +22,22 @@ entry: ; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]]) ; STATIC-O32: lui $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]]) ; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]]) -; PIC-N32: lw $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]]) -; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]]) -; PIC-N32: lw $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]]) -; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]]) -; STATIC-N32: lui $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]]) -; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]]) -; STATIC-N32: lui $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]]) -; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]]) -; PIC-N64: ld $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]]) -; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]]) -; PIC-N64: ld $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]]) -; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]]) -; STATIC-N64: ld $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]]) -; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]]) -; STATIC-N64: ld $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]]) -; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]]) +; PIC-N32: lw $[[R0:[0-9]+]], %got_page(.Ltmp[[T0:[0-9]+]]) +; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst(.Ltmp[[T0]]) +; PIC-N32: lw $[[R1:[0-9]+]], %got_page(.Ltmp[[T1:[0-9]+]]) +; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst(.Ltmp[[T1]]) +; STATIC-N32: lui $[[R2:[0-9]+]], %hi(.Ltmp[[T2:[0-9]+]]) +; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo(.Ltmp[[T2]]) +; STATIC-N32: lui $[[R3:[0-9]+]], %hi(.Ltmp[[T3:[0-9]+]]) +; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo(.Ltmp[[T3]]) +; PIC-N64: ld $[[R0:[0-9]+]], %got_page(.Ltmp[[T0:[0-9]+]]) +; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst(.Ltmp[[T0]]) +; PIC-N64: ld $[[R1:[0-9]+]], %got_page(.Ltmp[[T1:[0-9]+]]) +; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst(.Ltmp[[T1]]) +; STATIC-N64: ld $[[R2:[0-9]+]], %got_page(.Ltmp[[T2:[0-9]+]]) +; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst(.Ltmp[[T2]]) +; STATIC-N64: ld $[[R3:[0-9]+]], %got_page(.Ltmp[[T3:[0-9]+]]) +; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst(.Ltmp[[T3]]) ; STATIC-MIPS16-1: .ent f ; STATIC-MIPS16-2: .ent f ; STATIC-MIPS16-1: li $[[R1_16:[0-9]+]], %hi($tmp[[TI_16:[0-9]+]]) diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll index d6d47678590a..9352294991aa 100644 --- a/test/CodeGen/Mips/ehframe-indirect.ll +++ b/test/CodeGen/Mips/ehframe-indirect.ll @@ -33,9 +33,15 @@ declare void @foo() ; ALL: GCC_except_table{{[0-9]+}}: ; ALL: .byte 155 # @TType Encoding = indirect pcrel sdata4 -; ALL: $[[PC_LABEL:tmp[0-9]+]]: -; ALL: .4byte ($_ZTISt9exception.DW.stub)-($[[PC_LABEL]]) -; ALL: $_ZTISt9exception.DW.stub: +; O32: [[PC_LABEL:\$tmp[0-9]+]]: +; N32: [[PC_LABEL:\.Ltmp[0-9]+]]: +; N64: [[PC_LABEL:\.Ltmp[0-9]+]]: +; O32: .4byte ($_ZTISt9exception.DW.stub)-([[PC_LABEL]]) +; N32: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]] +; N64: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]] +; O32: $_ZTISt9exception.DW.stub: +; N32: .L_ZTISt9exception.DW.stub: +; N64: .L_ZTISt9exception.DW.stub: ; O32: .4byte _ZTISt9exception ; N32: .4byte _ZTISt9exception ; N64: .8byte _ZTISt9exception diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index 142ee1144bbe..bd04ed0211f5 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -1076,12 +1076,12 @@ entry: ; 32-CMP-DAG: bnezc $[[T4]], ; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)( ; 64-C-DAG: c.ole.s $[[T0]], $[[T1]] ; 64-C-DAG: bc1t ; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)( ; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. @@ -1106,8 +1106,8 @@ entry: ; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25 ; MM64R6-DAG: daddiu $[[T2:[0-9]+]], $[[T1]], %lo(%neg(%gp_rel(bug1_f32))) ; MM64R6-DAG: add.s $[[T3:f[0-9]+]], $f13, $f12 -; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page($CPI32_0)($[[T2]]) -; MM64R6-DAG: lwc1 $[[T5:f[0-9]+]], %got_ofst($CPI32_0)($[[T4]]) +; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page(.LCPI32_0)($[[T2]]) +; MM64R6-DAG: lwc1 $[[T5:f[0-9]+]], %got_ofst(.LCPI32_0)($[[T4]]) ; MM64R6-DAG: cmp.le.s $[[T6:f[0-9]+]], $[[T3]], $[[T5]] ; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]] ; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1 @@ -1145,12 +1145,12 @@ entry: ; 32-CMP-DAG: bnezc $[[T4]], ; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)( ; 64-C-DAG: c.ole.d $[[T0]], $[[T1]] ; 64-C-DAG: bc1t ; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)( ; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. @@ -1175,8 +1175,8 @@ entry: ; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25 ; MM64R6-DAG: daddiu $[[T2:[0-9]+]], $[[T1]], %lo(%neg(%gp_rel(bug1_f64))) ; MM64R6-DAG: add.d $[[T3:f[0-9]+]], $f13, $f12 -; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page($CPI33_0)($[[T2]]) -; MM64R6-DAG: ldc1 $[[T5:f[0-9]+]], %got_ofst($CPI33_0)($[[T4]]) +; MM64R6-DAG: ld $[[T4:[0-9]+]], %got_page(.LCPI33_0)($[[T2]]) +; MM64R6-DAG: ldc1 $[[T5:f[0-9]+]], %got_ofst(.LCPI33_0)($[[T4]]) ; MM64R6-DAG: cmp.le.d $[[T6:f[0-9]+]], $[[T3]], $[[T5]] ; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]] ; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1 diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll index bf1b045dbf28..7fb508f606b2 100644 --- a/test/CodeGen/Mips/fpbr.ll +++ b/test/CodeGen/Mips/fpbr.ll @@ -10,8 +10,9 @@ entry: ; ALL-LABEL: func0: ; 32-FCC: c.eq.s $f12, $f14 +; 32-FCC: bc1f $BB0_2 ; 64-FCC: c.eq.s $f12, $f13 -; FCC: bc1f $BB0_2 +; 64-FCC: bc1f .LBB0_2 ; 32-GPR: cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f14 ; 64-GPR: cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f13 @@ -19,7 +20,7 @@ entry: ; FIXME: We ought to be able to transform not+bnez -> beqz ; GPR: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: bnez $[[GPRCC]], $BB0_2 -; 64-GPR: bnezc $[[GPRCC]], $BB0_2 +; 64-GPR: bnezc $[[GPRCC]], .LBB0_2 %cmp = fcmp oeq float %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -45,15 +46,16 @@ entry: ; ALL-LABEL: func1: ; 32-FCC: c.olt.s $f12, $f14 +; 32-FCC: bc1f $BB1_2 ; 64-FCC: c.olt.s $f12, $f13 -; FCC: bc1f $BB1_2 +; 64-FCC: bc1f .LBB1_2 ; 32-GPR: cmp.ule.s $[[FGRCC:f[0-9]+]], $f14, $f12 ; 64-GPR: cmp.ule.s $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: bnez $[[GPRCC]], $BB1_2 -; 64-GPR: bnezc $[[GPRCC]], $BB1_2 +; 64-GPR: bnezc $[[GPRCC]], .LBB1_2 %cmp = fcmp olt float %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -75,15 +77,16 @@ entry: ; ALL-LABEL: func2: ; 32-FCC: c.ole.s $f12, $f14 +; 32-FCC: bc1t $BB2_2 ; 64-FCC: c.ole.s $f12, $f13 -; FCC: bc1t $BB2_2 +; 64-FCC: bc1t .LBB2_2 ; 32-GPR: cmp.ult.s $[[FGRCC:f[0-9]+]], $f14, $f12 ; 64-GPR: cmp.ult.s $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: beqz $[[GPRCC]], $BB2_2 -; 64-GPR: beqzc $[[GPRCC]], $BB2_2 +; 64-GPR: beqzc $[[GPRCC]], .LBB2_2 %cmp = fcmp ugt float %f2, %f3 br i1 %cmp, label %if.else, label %if.then @@ -105,8 +108,9 @@ entry: ; ALL-LABEL: func3: ; 32-FCC: c.eq.d $f12, $f14 +; 32-FCC: bc1f $BB3_2 ; 64-FCC: c.eq.d $f12, $f13 -; FCC: bc1f $BB3_2 +; 64-FCC: bc1f .LBB3_2 ; 32-GPR: cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f14 ; 64-GPR: cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f13 @@ -114,7 +118,7 @@ entry: ; FIXME: We ought to be able to transform not+bnez -> beqz ; GPR: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: bnez $[[GPRCC]], $BB3_2 -; 64-GPR: bnezc $[[GPRCC]], $BB3_2 +; 64-GPR: bnezc $[[GPRCC]], .LBB3_2 %cmp = fcmp oeq double %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -136,15 +140,16 @@ entry: ; ALL-LABEL: func4: ; 32-FCC: c.olt.d $f12, $f14 +; 32-FCC: bc1f $BB4_2 ; 64-FCC: c.olt.d $f12, $f13 -; FCC: bc1f $BB4_2 +; 64-FCC: bc1f .LBB4_2 ; 32-GPR: cmp.ule.d $[[FGRCC:f[0-9]+]], $f14, $f12 ; 64-GPR: cmp.ule.d $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: bnez $[[GPRCC]], $BB4_2 -; 64-GPR: bnezc $[[GPRCC]], $BB4_2 +; 64-GPR: bnezc $[[GPRCC]], .LBB4_2 %cmp = fcmp olt double %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -166,15 +171,16 @@ entry: ; ALL-LABEL: func5: ; 32-FCC: c.ole.d $f12, $f14 +; 32-FCC: bc1t $BB5_2 ; 64-FCC: c.ole.d $f12, $f13 -; FCC: bc1t $BB5_2 +; 64-FCC: bc1t .LBB5_2 ; 32-GPR: cmp.ult.d $[[FGRCC:f[0-9]+]], $f14, $f12 ; 64-GPR: cmp.ult.d $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; 32-GPR: beqz $[[GPRCC]], $BB5_2 -; 64-GPR: beqzc $[[GPRCC]], $BB5_2 +; 64-GPR: beqzc $[[GPRCC]], .LBB5_2 %cmp = fcmp ugt double %f2, %f3 br i1 %cmp, label %if.else, label %if.then diff --git a/test/CodeGen/Mips/jumptable_labels.ll b/test/CodeGen/Mips/jumptable_labels.ll new file mode 100644 index 000000000000..8c7edc10689f --- /dev/null +++ b/test/CodeGen/Mips/jumptable_labels.ll @@ -0,0 +1,75 @@ +; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32 +; RUN: llc -march=mips64 -target-abi=n32 < %s | FileCheck %s -check-prefix=N32 +; RUN: llc -march=mips64 < %s | FileCheck %s -check-prefix=N64 + +; We only use the '$' prefix on O32. The others use the ELF convention. +; O32: $JTI0_0 +; N32: .LJTI0_0 +; N64: .LJTI0_0 + +; Check basic block labels while we're at it. +; O32: $BB0_2: +; N32: .LBB0_2: +; N64: .LBB0_2: + +@.str = private unnamed_addr constant [2 x i8] c"A\00", align 1 +@.str.1 = private unnamed_addr constant [2 x i8] c"B\00", align 1 +@.str.2 = private unnamed_addr constant [2 x i8] c"C\00", align 1 +@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1 +@.str.4 = private unnamed_addr constant [2 x i8] c"E\00", align 1 +@.str.5 = private unnamed_addr constant [2 x i8] c"F\00", align 1 +@.str.6 = private unnamed_addr constant [2 x i8] c"G\00", align 1 +@.str.7 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +define i8* @_Z3fooi(i32 signext %Letter) { +entry: + %retval = alloca i8*, align 8 + %Letter.addr = alloca i32, align 4 + store i32 %Letter, i32* %Letter.addr, align 4 + %0 = load i32, i32* %Letter.addr, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + i32 5, label %sw.bb5 + i32 6, label %sw.bb6 + ] + +sw.bb: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb1: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb2: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb3: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb4: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb5: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb6: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.6, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.epilog: + store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.7, i32 0, i32 0), i8** %retval, align 8 + br label %return + +return: + %1 = load i8*, i8** %retval, align 8 + ret i8* %1 +} diff --git a/test/CodeGen/Mips/llvm-ir/ashr.ll b/test/CodeGen/Mips/llvm-ir/ashr.ll index af9b81f9203f..cfb9855e6438 100644 --- a/test/CodeGen/Mips/llvm-ir/ashr.ll +++ b/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -167,18 +167,18 @@ entry: ; M3: sll $[[T0:[0-9]+]], $7, 0 ; M3: dsrav $[[T1:[0-9]+]], $4, $7 ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 - ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T3:[0-9]+]], [[BB0:.LBB[0-9_]+]] ; M3: move $3, $[[T1]] ; M3: dsrlv $[[T4:[0-9]+]], $5, $7 ; M3: dsll $[[T5:[0-9]+]], $4, 1 ; M3: not $[[T6:[0-9]+]], $[[T0]] ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $3, $[[T7]], $[[T4]] - ; M3: $[[BB0]]: - ; M3: beqz $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: [[BB0]]: + ; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]] ; M3: nop ; M3: dsra $2, $4, 63 - ; M3: $[[BB1]]: + ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll index d982b570d7c2..8fed32aee9be 100644 --- a/test/CodeGen/Mips/llvm-ir/indirectbr.ll +++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll @@ -18,13 +18,13 @@ define i32 @br(i8 *%addr) { ; R6C: jrc $4 # <MCInst #{{[0-9]+}} JIC -; ALL: $BB0_1: # %L1 +; ALL: {{\$|\.L}}BB0_1: # %L1 ; NOT-R6: jr $ra # <MCInst #{{[0-9]+}} JR ; R6: jr $ra # <MCInst #{{[0-9]+}} JALR ; R6C: jr $ra # <MCInst #{{[0-9]+}} JALR ; ALL: addiu $2, $zero, 0 -; ALL: $BB0_2: # %L2 +; ALL: {{\$|\.L}}BB0_2: # %L2 ; NOT-R6: jr $ra # <MCInst #{{[0-9]+}} JR ; R6: jr $ra # <MCInst #{{[0-9]+}} JALR ; R6C: jr $ra # <MCInst #{{[0-9]+}} JALR diff --git a/test/CodeGen/Mips/llvm-ir/lshr.ll b/test/CodeGen/Mips/llvm-ir/lshr.ll index 10748b9c803a..63fb075a4ad6 100644 --- a/test/CodeGen/Mips/llvm-ir/lshr.ll +++ b/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -158,18 +158,18 @@ entry: ; M3: sll $[[T0:[0-9]+]], $7, 0 ; M3: dsrlv $[[T1:[0-9]+]], $4, $7 ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 - ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]] ; M3: move $3, $[[T1]] ; M3: dsrlv $[[T4:[0-9]+]], $5, $7 ; M3: dsll $[[T5:[0-9]+]], $4, 1 ; M3: not $[[T6:[0-9]+]], $[[T0]] ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $3, $[[T7]], $[[T4]] - ; M3: $[[BB0]]: - ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: [[BB0]]: + ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]] ; M3: daddiu $2, $zero, 0 ; M3: move $2, $[[T1]] - ; M3: $[[BB1]]: + ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop diff --git a/test/CodeGen/Mips/llvm-ir/select-dbl.ll b/test/CodeGen/Mips/llvm-ir/select-dbl.ll index 1ca5b4e054ba..42f02c4cbabf 100644 --- a/test/CodeGen/Mips/llvm-ir/select-dbl.ll +++ b/test/CodeGen/Mips/llvm-ir/select-dbl.ll @@ -58,10 +58,10 @@ entry: ; SEL-32: sel.d $f0, $[[F1]], $[[F0]] ; M3: andi $[[T0:[0-9]+]], $4, 1 - ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:.LBB[0-9_]+]] ; M3: nop ; M3: mov.d $f13, $f14 - ; M3: $[[BB0]]: + ; M3: [[BB0]]: ; M3: jr $ra ; M3: mov.d $f0, $f13 @@ -106,10 +106,10 @@ entry: ; SEL-32: sel.d $f0, $f14, $f12 ; M3: andi $[[T0:[0-9]+]], $6, 1 - ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M3: nop ; M3: mov.d $f12, $f13 - ; M3: $[[BB0]]: + ; M3: [[BB0]]: ; M3: jr $ra ; M3: mov.d $f0, $f12 @@ -135,11 +135,12 @@ entry: ; M2: c.olt.d $f12, $f14 ; M3: c.olt.d $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -172,11 +173,12 @@ entry: ; M2: c.ole.d $f12, $f14 ; M3: c.ole.d $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -209,11 +211,12 @@ entry: ; M2: c.ule.d $f12, $f14 ; M3: c.ule.d $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -246,11 +249,12 @@ entry: ; M2: c.ult.d $f12, $f14 ; M3: c.ult.d $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -283,11 +287,12 @@ entry: ; M2: c.eq.d $f12, $f14 ; M3: c.eq.d $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -320,11 +325,12 @@ entry: ; M2: c.ueq.d $f12, $f14 ; M3: c.ueq.d $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 diff --git a/test/CodeGen/Mips/llvm-ir/select-flt.ll b/test/CodeGen/Mips/llvm-ir/select-flt.ll index 6a0334da4833..e632897e76cd 100644 --- a/test/CodeGen/Mips/llvm-ir/select-flt.ll +++ b/test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -34,12 +34,13 @@ entry: ; ALL-LABEL: tst_select_i1_float: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: jr $ra ; M2: mtc1 $6, $f0 ; M3: mov.s $f13, $f14 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2: mtc1 $5, $f0 ; M3: mov.s $f0, $f13 @@ -76,11 +77,12 @@ entry: ; ALL-LABEL: tst_select_i1_float_reordered: ; M2-M3: andi $[[T0:[0-9]+]], $6, 1 - ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -112,11 +114,12 @@ entry: ; M2: c.olt.s $f12, $f14 ; M3: c.olt.s $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -149,11 +152,12 @@ entry: ; M2: c.ole.s $f12, $f14 ; M3: c.ole.s $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -186,11 +190,12 @@ entry: ; M2: c.ule.s $f12, $f14 ; M3: c.ule.s $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -223,11 +228,12 @@ entry: ; M2: c.ult.s $f12, $f14 ; M3: c.ult.s $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -260,11 +266,12 @@ entry: ; M2: c.eq.s $f12, $f14 ; M3: c.eq.s $f12, $f13 - ; M2-M3: bc1t $[[BB0:BB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -297,11 +304,12 @@ entry: ; M2: c.ueq.s $f12, $f14 ; M3: c.ueq.s $f12, $f13 - ; M2-M3: bc1f $[[BB0:BB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 diff --git a/test/CodeGen/Mips/llvm-ir/select-int.ll b/test/CodeGen/Mips/llvm-ir/select-int.ll index e8f78ffdcb6a..5bee3f1dbd52 100644 --- a/test/CodeGen/Mips/llvm-ir/select-int.ll +++ b/test/CodeGen/Mips/llvm-ir/select-int.ll @@ -35,10 +35,11 @@ entry: ; ALL-LABEL: tst_select_i1_i1: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2-M3: move $5, $6 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -70,10 +71,11 @@ entry: ; ALL-LABEL: tst_select_i1_i8: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2-M3: move $5, $6 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -105,10 +107,11 @@ entry: ; ALL-LABEL: tst_select_i1_i32: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2-M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop ; M2-M3: move $5, $6 - ; M2-M3: $[[BB0]]: + ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -170,10 +173,10 @@ entry: ; SEL-32: or $3, $[[T4]], $[[T6]] ; M3: andi $[[T0:[0-9]+]], $4, 1 - ; M3: bnez $[[T0]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M3: nop ; M3: move $5, $6 - ; M3: $[[BB0]]: + ; M3: [[BB0]]: ; M3: jr $ra ; M3: move $2, $5 @@ -214,19 +217,19 @@ define i8* @tst_select_word_cst(i8* %a, i8* %b) { ; M2: addiu $[[T0:[0-9]+]], $zero, -1 ; M2: xor $[[T1:[0-9]+]], $5, $[[T0]] ; M2: sltu $[[T2:[0-9]+]], $zero, $[[T1]] - ; M2: bnez $[[T2]], $[[BB0:BB[0-9_]+]] + ; M2: bnez $[[T2]], [[BB0:\$BB[0-9_]+]] ; M2: addiu $2, $zero, 0 ; M2: move $2, $4 - ; M2: $[[BB0]]: + ; M2: [[BB0]]: ; M2: jr $ra ; M3: daddiu $[[T0:[0-9]+]], $zero, -1 ; M3: xor $[[T1:[0-9]+]], $5, $[[T0]] ; M3: sltu $[[T2:[0-9]+]], $zero, $[[T1]] - ; M3: bnez $[[T2]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T2]], [[BB0:\.LBB[0-9_]+]] ; M3: daddiu $2, $zero, 0 ; M3: move $2, $4 - ; M3: $[[BB0]]: + ; M3: [[BB0]]: ; M3: jr $ra ; CMOV-32: addiu $[[T0:[0-9]+]], $zero, -1 diff --git a/test/CodeGen/Mips/llvm-ir/shl.ll b/test/CodeGen/Mips/llvm-ir/shl.ll index fa43840a8b7b..74b6155032a3 100644 --- a/test/CodeGen/Mips/llvm-ir/shl.ll +++ b/test/CodeGen/Mips/llvm-ir/shl.ll @@ -174,18 +174,18 @@ entry: ; M3: sll $[[T0:[0-9]+]], $7, 0 ; M3: dsllv $[[T1:[0-9]+]], $5, $7 ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 - ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]] ; M3: move $2, $[[T1]] ; M3: dsllv $[[T4:[0-9]+]], $4, $7 ; M3: dsrl $[[T5:[0-9]+]], $5, 1 ; M3: not $[[T6:[0-9]+]], $[[T0]] ; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $2, $[[T4]], $[[T7]] - ; M3: $[[BB0]]: - ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: [[BB0]]: + ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]] ; M3: daddiu $3, $zero, 0 ; M3: move $3, $[[T1]] - ; M3: $[[BB1]]: + ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index 06eda11e7888..59e284165d40 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -84,28 +84,28 @@ end: ; Check the MIPS64 version. ; N64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1))) -; N64: bnez $4, $[[BB0:BB[0-9_]+]] +; N64: bnez $4, [[BB0:\.LBB[0-9_]+]] ; N64: daddu $[[R1:[0-9]+]], $[[R0]], $25 ; Check for long branch expansion: ; N64: daddiu $sp, $sp, -16 ; N64-NEXT: sd $ra, 0($sp) -; N64-NEXT: daddiu $1, $zero, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]])) +; N64-NEXT: daddiu $1, $zero, %hi([[BB2:\.LBB[0-9_]+]]-[[BB1:\.LBB[0-9_]+]]) ; N64-NEXT: dsll $1, $1, 16 -; N64-NEXT: bal $[[BB1]] -; N64-NEXT: daddiu $1, $1, %lo(($[[BB2]])-($[[BB1]])) -; N64-NEXT: $[[BB1]]: +; N64-NEXT: bal [[BB1]] +; N64-NEXT: daddiu $1, $1, %lo([[BB2]]-[[BB1]]) +; N64-NEXT: [[BB1]]: ; N64-NEXT: daddu $1, $ra, $1 ; N64-NEXT: ld $ra, 0($sp) ; N64-NEXT: jr $1 ; N64-NEXT: daddiu $sp, $sp, 16 -; N64: $[[BB0]]: +; N64: [[BB0]]: ; N64: daddiu $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1))) ; N64: ld $[[R2:[0-9]+]], %got_disp(x)($[[GP]]) ; N64: addiu $[[R3:[0-9]+]], $zero, 1 ; N64: sw $[[R3]], 0($[[R2]]) -; N64: $[[BB2]]: +; N64: [[BB2]]: ; N64: jr $ra ; N64: nop diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll index 5d253d7af253..d7a05800a273 100644 --- a/test/CodeGen/Mips/msa/basic_operations.ll +++ b/test/CodeGen/Mips/msa/basic_operations.ll @@ -36,14 +36,14 @@ define void @const_v16i8() nounwind { store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8 @@ -59,8 +59,8 @@ define void @const_v16i8() nounwind { store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) ret void @@ -77,8 +77,8 @@ define void @const_v8i16() nounwind { store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16 @@ -93,8 +93,8 @@ define void @const_v8i16() nounwind { store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]]) ret void @@ -111,8 +111,8 @@ define void @const_v4i32() nounwind { store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32 @@ -123,14 +123,14 @@ define void @const_v4i32() nounwind { store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) ret void @@ -156,15 +156,15 @@ define void @const_v2i64() nounwind { store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll index d714b3eec1f2..15468781f308 100644 --- a/test/CodeGen/Mips/msa/basic_operations_float.ll +++ b/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -23,8 +23,8 @@ define void @const_v4f32() nounwind { store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32 @@ -34,14 +34,14 @@ define void @const_v4f32() nounwind { store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) ret void @@ -55,38 +55,38 @@ define void @const_v2f64() nounwind { store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ + ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L + ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) ret void diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll index b441274cec6b..7e2a81019ac4 100644 --- a/test/CodeGen/Mips/octeon.ll +++ b/test/CodeGen/Mips/octeon.ll @@ -91,9 +91,9 @@ entry: define i64 @bbit0(i64 %a) nounwind { entry: ; ALL-LABEL: bbit0: -; OCTEON: bbit0 $4, 3, $[[BB0:BB[0-9_]+]] +; OCTEON: bbit0 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]] ; MIPS64: andi $[[T0:[0-9]+]], $4, 8 -; MIPS64: bnez $[[T0]], $[[BB0:BB[0-9_]+]] +; MIPS64: bnez $[[T0]], [[BB0:(\$|\.L)BB[0-9_]+]] %bit = and i64 %a, 8 %res = icmp eq i64 %bit, 0 br i1 %res, label %endif, label %if @@ -107,11 +107,11 @@ endif: define i64 @bbit032(i64 %a) nounwind { entry: ; ALL-LABEL: bbit032: -; OCTEON: bbit032 $4, 3, $[[BB0:BB[0-9_]+]] +; OCTEON: bbit032 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]] ; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1 ; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35 ; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]] -; MIPS64: bnez $[[T2]], $[[BB0:BB[0-9_]+]] +; MIPS64: bnez $[[T2]], [[BB0:(\$|\.L)BB[0-9_]+]] %bit = and i64 %a, 34359738368 %res = icmp eq i64 %bit, 0 br i1 %res, label %endif, label %if @@ -125,9 +125,9 @@ endif: define i64 @bbit1(i64 %a) nounwind { entry: ; ALL-LABEL: bbit1: -; OCTEON: bbit1 $4, 3, $[[BB0:BB[0-9_]+]] +; OCTEON: bbit1 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]] ; MIPS64: andi $[[T0:[0-9]+]], $4, 8 -; MIPS64: beqz $[[T0]], $[[BB0:BB[0-9_]+]] +; MIPS64: beqz $[[T0]], [[BB0:(\$|\.L)BB[0-9_]+]] %bit = and i64 %a, 8 %res = icmp ne i64 %bit, 0 br i1 %res, label %endif, label %if @@ -141,11 +141,11 @@ endif: define i64 @bbit132(i64 %a) nounwind { entry: ; ALL-LABEL: bbit132: -; OCTEON: bbit132 $4, 3, $[[BB0:BB[0-9_]+]] +; OCTEON: bbit132 $4, 3, [[BB0:(\$|\.L)BB[0-9_]+]] ; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1 ; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35 ; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]] -; MIPS64: beqz $[[T2]], $[[BB0:BB[0-9_]+]] +; MIPS64: beqz $[[T2]], [[BB0:(\$|\.L)BB[0-9_]+]] %bit = and i64 %a, 34359738368 %res = icmp ne i64 %bit, 0 br i1 %res, label %endif, label %if diff --git a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index c7cf857e1d44..f886e1ff814d 100644 --- a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -681,10 +681,11 @@ define <2 x i64> @test_mm256_cvttpd_epi32(<4 x double> %a0) nounwind { ; X64-NEXT: vcvttpd2dqy %ymm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq - %cvt = fptosi <4 x double> %a0 to <4 x i32> + %cvt = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) %res = bitcast <4 x i32> %cvt to <2 x i64> ret <2 x i64> %res } +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind { ; X32-LABEL: test_mm256_cvttps_epi32: @@ -696,10 +697,11 @@ define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: vcvttps2dq %ymm0, %ymm0 ; X64-NEXT: retq - %cvt = fptosi <8 x float> %a0 to <8 x i32> + %cvt = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) %res = bitcast <8 x i32> %cvt to <4 x i64> ret <4 x i64> %res } +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_div_pd: diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index a7b4c6b285d8..0630fd8a93ca 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -359,35 +359,12 @@ define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone -define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { -; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retl - %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone - - -define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { -; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 -; CHECK-NEXT: retl - %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone - - define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { ; add operation forces the execution domain. ; CHECK-LABEL: test_x86_sse2_storeu_dq: ; CHECK: ## BB#0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: vpaddb LCPI34_0, %xmm0, %xmm0 +; CHECK-NEXT: vpaddb LCPI32_0, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqu %xmm0, (%eax) ; CHECK-NEXT: retl %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 35763297d816..c5d60da8f900 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -3431,6 +3431,39 @@ define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { +; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvttpd2dqy %ymm0, %xmm0 +; AVX512VL-NEXT: retl + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { +; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX512VL-NEXT: retl + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { ; AVX-LABEL: test_x86_avx_dp_ps_256: ; AVX: ## BB#0: @@ -4552,7 +4585,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddq LCPI256_0, %xmm0, %xmm0 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl @@ -4560,7 +4593,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq LCPI256_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <2 x i64> %a1, <i64 1, i64 1> diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 914f859927be..d2410e4a0a5d 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -744,6 +744,36 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { ret <8 x double> %1 } +define <16 x double> @sitofp_16i1_double(<16 x double> %a) { +; KNL-LABEL: sitofp_16i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vcmpltpd %zmm1, %zmm2, %k1 +; KNL-NEXT: vcmpltpd %zmm0, %zmm2, %k2 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k2} {z} +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 +; KNL-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm1, %ymm1 +; KNL-NEXT: vcvtdq2pd %ymm1, %zmm1 +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_16i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0 +; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 +; SKX-NEXT: vpmovm2d %k1, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: vpmovm2d %k0, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq + %cmpres = fcmp ogt <16 x double> %a, zeroinitializer + %1 = sitofp <16 x i1> %cmpres to <16 x double> + ret <16 x double> %1 +} + define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; KNL-LABEL: sitofp_8i1_double: ; KNL: ## BB#0: @@ -767,6 +797,130 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ret <8 x double> %1 } +define <8 x float> @sitofp_8i1_float(<8 x float> %a) { +; KNL-LABEL: sitofp_8i1_float: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vcvtdq2ps %ymm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_8i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 +; SKX-NEXT: retq + %cmpres = fcmp ogt <8 x float> %a, zeroinitializer + %1 = sitofp <8 x i1> %cmpres to <8 x float> + ret <8 x float> %1 +} + +define <4 x float> @sitofp_4i1_float(<4 x float> %a) { +; KNL-LABEL: sitofp_4i1_float: +; KNL: ## BB#0: +; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_4i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; SKX-NEXT: retq + %cmpres = fcmp ogt <4 x float> %a, zeroinitializer + %1 = sitofp <4 x i1> %cmpres to <4 x float> + ret <4 x float> %1 +} + +define <4 x double> @sitofp_4i1_double(<4 x double> %a) { +; KNL-LABEL: sitofp_4i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 +; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_4i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; SKX-NEXT: retq + %cmpres = fcmp ogt <4 x double> %a, zeroinitializer + %1 = sitofp <4 x i1> %cmpres to <4 x double> + ret <4 x double> %1 +} + +define <2 x float> @sitofp_2i1_float(<2 x float> %a) { +; KNL-LABEL: sitofp_2i1_float: +; KNL: ## BB#0: +; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; KNL-NEXT: vpsllq $32, %xmm0, %xmm0 +; KNL-NEXT: vpsrad $31, %xmm0, %xmm1 +; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; KNL-NEXT: vpextrq $1, %xmm0, %rax +; KNL-NEXT: xorl %ecx, %ecx +; KNL-NEXT: testb $1, %al +; KNL-NEXT: movl $-1, %eax +; KNL-NEXT: movl $0, %edx +; KNL-NEXT: cmovnel %eax, %edx +; KNL-NEXT: vcvtsi2ssl %edx, %xmm0, %xmm1 +; KNL-NEXT: vmovq %xmm0, %rdx +; KNL-NEXT: testb $1, %dl +; KNL-NEXT: cmovnel %eax, %ecx +; KNL-NEXT: vcvtsi2ssl %ecx, %xmm0, %xmm0 +; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_2i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; SKX-NEXT: retq + %cmpres = fcmp ogt <2 x float> %a, zeroinitializer + %1 = sitofp <2 x i1> %cmpres to <2 x float> + ret <2 x float> %1 +} + +define <2 x double> @sitofp_2i1_double(<2 x double> %a) { +; KNL-LABEL: sitofp_2i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: sitofp_2i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 +; SKX-NEXT: retq + %cmpres = fcmp ogt <2 x double> %a, zeroinitializer + %1 = sitofp <2 x i1> %cmpres to <2 x double> + ret <2 x double> %1 +} + define <16 x float> @uitofp_16i8(<16 x i8>%a) { ; ALL-LABEL: uitofp_16i8: ; ALL: ## BB#0: @@ -787,3 +941,196 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) { ret <16 x float>%b } +define <16 x float> @uitofp_16i1_float(<16 x i32> %a) { +; ALL-LABEL: uitofp_16i1_float: +; ALL: ## BB#0: +; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 +; ALL-NEXT: retq + %mask = icmp slt <16 x i32> %a, zeroinitializer + %1 = uitofp <16 x i1> %mask to <16 x float> + ret <16 x float> %1 +} + +define <16 x double> @uitofp_16i1_double(<16 x i32> %a) { +; KNL-LABEL: uitofp_16i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; KNL-NEXT: movq {{.*}}(%rip), %rax +; KNL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; KNL-NEXT: kshiftrw $8, %k1, %k1 +; KNL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm1, %ymm1 +; KNL-NEXT: vcvtudq2pd %ymm1, %zmm1 +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_16i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; SKX-NEXT: movl {{.*}}(%rip), %eax +; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 +; SKX-NEXT: kshiftrw $8, %k1, %k1 +; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} +; SKX-NEXT: vcvtudq2pd %ymm1, %zmm1 +; SKX-NEXT: retq + %mask = icmp slt <16 x i32> %a, zeroinitializer + %1 = uitofp <16 x i1> %mask to <16 x double> + ret <16 x double> %1 +} + +define <8 x float> @uitofp_8i1_float(<8 x i32> %a) { +; KNL-LABEL: uitofp_8i1_float: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 +; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_8i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 +; SKX-NEXT: retq + %mask = icmp slt <8 x i32> %a, zeroinitializer + %1 = uitofp <8 x i1> %mask to <8 x float> + ret <8 x float> %1 +} + +define <8 x double> @uitofp_8i1_double(<8 x i32> %a) { +; KNL-LABEL: uitofp_8i1_double: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_8i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 +; SKX-NEXT: retq + %mask = icmp slt <8 x i32> %a, zeroinitializer + %1 = uitofp <8 x i1> %mask to <8 x double> + ret <8 x double> %1 +} + +define <4 x float> @uitofp_4i1_float(<4 x i32> %a) { +; KNL-LABEL: uitofp_4i1_float: +; KNL: ## BB#0: +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 +; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 +; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_4i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 +; SKX-NEXT: retq + %mask = icmp slt <4 x i32> %a, zeroinitializer + %1 = uitofp <4 x i1> %mask to <4 x float> + ret <4 x float> %1 +} + +define <4 x double> @uitofp_4i1_double(<4 x i32> %a) { +; KNL-LABEL: uitofp_4i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 +; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_4i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 +; SKX-NEXT: retq + %mask = icmp slt <4 x i32> %a, zeroinitializer + %1 = uitofp <4 x i1> %mask to <4 x double> + ret <4 x double> %1 +} + +define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { +; KNL-LABEL: uitofp_2i1_float: +; KNL: ## BB#0: +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpextrq $1, %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm1 +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 +; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_2i1_float: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 +; SKX-NEXT: retq + %mask = icmp ult <2 x i32> %a, zeroinitializer + %1 = uitofp <2 x i1> %mask to <2 x float> + ret <2 x float> %1 +} + +define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { +; KNL-LABEL: uitofp_2i1_double: +; KNL: ## BB#0: +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0 +; KNL-NEXT: vpextrq $1, %xmm0, %rax +; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; KNL-NEXT: retq +; +; SKX-LABEL: uitofp_2i1_double: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 +; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} +; SKX-NEXT: vcvtuqq2pd %xmm0, %xmm0 +; SKX-NEXT: retq + %mask = icmp ult <2 x i32> %a, zeroinitializer + %1 = uitofp <2 x i1> %mask to <2 x double> + ret <2 x double> %1 +} diff --git a/test/CodeGen/X86/pr28504.ll b/test/CodeGen/X86/pr28504.ll new file mode 100644 index 000000000000..a617c8aa4f1e --- /dev/null +++ b/test/CodeGen/X86/pr28504.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +; The test case is rather involved, because we need to get to a state where +; We have a sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) combine, +; BUT this combine is only triggered post-legalization, so the setcc's return +; type is i8. So we can't have the combine opportunity be exposed too early. +; Basically, what we want to see is that the compare result zero-extended, and +; then stored. Only one zext, and no sexts. + +; CHECK-LABEL: main: +; CHECK: movzbl (%rdi), %[[EAX:.*]] +; CHECK-NEXT: xorl %e[[C:.]]x, %e[[C]]x +; CHECK-NEXT: cmpl $1, %[[EAX]] +; CHECK-NEXT: sete %[[C]]l +; CHECK-NEXT: movl %e[[C]]x, (%rsi) +define void @main(i8* %p, i32* %q) { +bb: + %tmp4 = load i8, i8* %p, align 1 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = load i8, i8* %p, align 1 + %tmp7 = zext i8 %tmp6 to i32 + %tmp8 = sub nsw i32 %tmp5, %tmp7 + %tmp11 = icmp eq i32 %tmp7, 1 + %tmp12 = zext i1 %tmp11 to i32 + %tmp13 = add nsw i32 %tmp8, %tmp12 + %tmp14 = trunc i32 %tmp13 to i8 + %tmp15 = sext i8 %tmp14 to i16 + %tmp16 = sext i16 %tmp15 to i32 + store i32 %tmp16, i32* %q, align 4 + br i1 %tmp11, label %bb21, label %bb22 + +bb21: ; preds = %bb + unreachable + +bb22: ; preds = %bb + ret void +} diff --git a/test/CodeGen/X86/pr28824.ll b/test/CodeGen/X86/pr28824.ll new file mode 100644 index 000000000000..ced1f00dd01b --- /dev/null +++ b/test/CodeGen/X86/pr28824.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s + +@d = global i32 0, align 4 + +; Verify the sar happens before ecx is clobbered with the parameter being +; passed to fn3 +; CHECK-LABEL: fn4 +; CHECK: movb d, %cl +; CHECK: sarl %cl +; CHECK: movl $2, %ecx +define i32 @fn4(i32 %i) #0 { +entry: + %0 = load i32, i32* @d, align 4 + %shr = ashr i32 %i, %0 + tail call fastcc void @fn3(i32 2, i32 5, i32 %shr, i32 %i) + %cmp = icmp slt i32 %shr, 1 + %. = zext i1 %cmp to i32 + ret i32 %. +} + +declare void @fn3(i32 %p1, i32 %p2, i32 %p3, i32 %p4) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll index 2102b4211153..aad00e71dda0 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel-x86_64.ll @@ -6,13 +6,12 @@ define <4 x float> @test_mm_cvtsi64_ss(<4 x float> %a0, i64 %a1) nounwind { ; X64-LABEL: test_mm_cvtsi64_ss: ; X64: # BB#0: -; X64-NEXT: cvtsi2ssq %rdi, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X64-NEXT: cvtsi2ssq %rdi, %xmm0 ; X64-NEXT: retq - %cvt = sitofp i64 %a1 to float - %res = insertelement <4 x float> %a0, float %cvt, i32 0 + %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ret <4 x float> %res } +declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone define i64 @test_mm_cvtss_si64(<4 x float> %a0) nounwind { ; X64-LABEL: test_mm_cvtss_si64: @@ -29,7 +28,7 @@ define i64 @test_mm_cvttss_si64(<4 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttss2si %xmm0, %rax ; X64-NEXT: retq - %cvt = extractelement <4 x float> %a0, i32 0 - %res = fptosi float %cvt to i64 + %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ret i64 %res } +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 090ddfdfa93a..4715b7f00fcb 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -707,20 +707,17 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { ; X32-LABEL: test_mm_cvtsi32_ss: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: cvtsi2ssl %eax, %xmm1 -; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X32-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cvtsi32_ss: ; X64: # BB#0: -; X64-NEXT: cvtsi2ssl %edi, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X64-NEXT: cvtsi2ssl %edi, %xmm0 ; X64-NEXT: retq - %cvt = sitofp i32 %a1 to float - %res = insertelement <4 x float> %a0, float %cvt, i32 0 + %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) ret <4 x float> %res } +declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { ; X32-LABEL: test_mm_cvtss_f32: @@ -762,10 +759,10 @@ define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: retq - %cvt = extractelement <4 x float> %a0, i32 0 - %res = fptosi float %cvt to i32 + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ret i32 %res } +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { ; X32-LABEL: test_mm_cvttss_si32: @@ -777,8 +774,7 @@ define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: retq - %cvt = extractelement <4 x float> %a0, i32 0 - %res = fptosi float %cvt to i32 + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ret i32 %res } diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll index f5ecfa444d86..6b9dc40a59e2 100644 --- a/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll +++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel-x86_64.ll @@ -25,13 +25,12 @@ define i64 @test_mm_cvtsi128_si64(<2 x i64> %a0) nounwind { define <2 x double> @test_mm_cvtsi64_sd(<2 x double> %a0, i64 %a1) nounwind { ; X64-LABEL: test_mm_cvtsi64_sd: ; X64: # BB#0: -; X64-NEXT: cvtsi2sdq %rdi, %xmm1 -; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64-NEXT: cvtsi2sdq %rdi, %xmm0 ; X64-NEXT: retq - %cvt = sitofp i64 %a1 to double - %res = insertelement <2 x double> %a0, double %cvt, i32 0 + %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ret <2 x double> %res } +declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone define <2 x i64> @test_mm_cvtsi64_si128(i64 %a0) nounwind { ; X64-LABEL: test_mm_cvtsi64_si128: @@ -48,10 +47,10 @@ define i64 @test_mm_cvttsd_si64(<2 x double> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttsd2si %xmm0, %rax ; X64-NEXT: retq - %ext = extractelement <2 x double> %a0, i32 0 - %res = fptosi double %ext to i64 + %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ret i64 %res } +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone define <2 x i64> @test_mm_loadu_si64(i64* %a0) nounwind { ; X64-LABEL: test_mm_loadu_si64: diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index fa71325d7d6e..d3ebba93c769 100644 --- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1208,6 +1208,39 @@ define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { } declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone +define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { +; X32-LABEL: test_mm_cvtsd_ss: +; X32: # BB#0: +; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtsd_ss: +; X64: # BB#0: +; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: retq + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + +define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { +; X32-LABEL: test_mm_cvtsd_ss_load: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtsd_ss_load: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm1 +; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) + ret <4 x float> %res +} + define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm_cvtsi128_si32: ; X32: # BB#0: @@ -1303,10 +1336,11 @@ define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttps2dq %xmm0, %xmm0 ; X64-NEXT: retq - %res = fptosi <4 x float> %a0 to <4 x i32> + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc } +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm_cvttsd_si32: @@ -1318,10 +1352,10 @@ define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: retq - %ext = extractelement <2 x double> %a0, i32 0 - %res = fptosi double %ext to i32 + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ret i32 %res } +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_div_pd: diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index ae6626bb0dc5..27a3fce0be2a 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -66,17 +66,6 @@ define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone -define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { -; CHECK-LABEL: test_x86_sse2_cvttps2dq: -; CHECK: ## BB#0: -; CHECK-NEXT: cvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: retl - %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone - - define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_x86_sse2_storel_dq: ; CHECK: ## BB#0: @@ -94,7 +83,7 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_x86_sse2_storeu_dq: ; CHECK: ## BB#0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: paddb LCPI8_0, %xmm0 +; CHECK-NEXT: paddb LCPI7_0, %xmm0 ; CHECK-NEXT: movdqu %xmm0, (%eax) ; CHECK-NEXT: retl %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 617e30e4b92c..3ae3aecabaf5 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL @@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtsd2ss_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2ss_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { ; SSE-LABEL: test_x86_sse2_cvtsi2sd: ; SSE: ## BB#0: @@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone +define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtss2sd_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtss2sd_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttpd2dq: ; SSE: ## BB#0: @@ -322,6 +360,22 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { +; SSE-LABEL: test_x86_sse2_cvttps2dq: +; SSE: ## BB#0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvttps2dq: +; KNL: ## BB#0: +; KNL-NEXT: vcvttps2dq %xmm0, %xmm0 +; KNL-NEXT: retl + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttsd2si: ; SSE: ## BB#0: diff --git a/test/CodeGen/X86/tail-merge-after-mbp.ll b/test/CodeGen/X86/tail-merge-after-mbp.ll new file mode 100644 index 000000000000..dc5f3a12bd91 --- /dev/null +++ b/test/CodeGen/X86/tail-merge-after-mbp.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple=x86_64-linux -o - %s | FileCheck %s + +%0 = type { %1, %3* } +%1 = type { %2* } +%2 = type { %2*, i8* } +%3 = type { i32, i32 (i32, i32)* } + + +declare i32 @Up(...) +declare i32 @f(i32, i32) + +; check loop block_14 is not merged with block_21 +; check loop block_11 is not merged with block_18, block_25 +define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) { +; CHECK-LABEL: foo: +; CHECK: # %block_11 +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je +; CHECK-NEXT:# %block_14 +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne +; CHECK-NEXT:# %block_18 +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je +; CHECK-NEXT:# %block_21 +; CHECK-NEXT:# =>This Inner Loop Header +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne +; CHECK-NEXT:# %block_25 +; CHECK-NEXT:# in Loop +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jne + br i1 %c, label %block_34, label %block_3 + +block_3: ; preds = %2 + br i1 %c, label %block_7, label %block_4 + +block_4: ; preds = %block_3 + %a5 = tail call i32 @f(i32 undef, i32 undef) + %a6 = icmp eq i32 %a5, 0 + br i1 %a6, label %block_7, label %block_34 + +block_7: ; preds = %block_4, %block_3 + %a8 = icmp eq %2* null, null + br i1 %a8, label %block_34, label %block_9 + +block_9: ; preds = %block_7 + %a10 = icmp eq i8* %p1, null + br i1 %a10, label %block_11, label %block_32 + +block_11: ; preds = %block_9 + %a12 = load %2*, %2** %p2, align 8 + %a13 = icmp eq %2* %a12, null + br i1 %a13, label %block_34, label %block_14 + +block_14: ; preds = %block_11 + %a15 = getelementptr inbounds %2, %2* %a12, i64 0, i32 1 + %a16 = load i8*, i8** %a15, align 8 + %a17 = icmp eq i8* %a16, null + br i1 %a17, label %block_18, label %block_32 + +block_18: ; preds = %block_14 + %a19 = load %2*, %2** %p2, align 8 + %a20 = icmp eq %2* %a19, null + br i1 %a20, label %block_34, label %block_21 + +block_21: ; preds = %block_18 + %a22 = getelementptr inbounds %2, %2* %a19, i64 0, i32 1 + %a23 = load i8*, i8** %a22, align 8 + %a24 = icmp eq i8* %a23, null + br i1 %a24, label %block_25, label %block_32 + +block_25: ; preds = %block_28, %block_21 + %a26 = load %2*, %2** %p2, align 8 + %a27 = icmp eq %2* %a26, null + br i1 %a27, label %block_34, label %block_28 + +block_28: ; preds = %block_25 + %a29 = getelementptr inbounds %2, %2* %a26, i64 0, i32 1 + %a30 = load i8*, i8** %a29, align 8 + %a31 = icmp eq i8* %a30, null + br i1 %a31, label %block_25, label %block_32 + +block_32: ; preds = %block_28, %block_21, %block_14, %block_9 + %a33 = tail call i32 (...) @Up() + br label %block_34 + +block_34: ; preds = %block_32, %block_25, %block_18, %block_11, %block_7, %block_4, %2 + %a35 = phi i32 [ 0, %2 ], [ %a5, %block_4 ], [ 0, %block_7 ], [ 0, %block_11 ], [ 0, %block_32 ], [ 0, %block_18 ], [ 0, %block_25 ] + ret i32 %a35 +} diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll index 5779cf33ac84..2944b17c6c16 100644 --- a/test/CodeGen/X86/twoaddr-lea.ll +++ b/test/CodeGen/X86/twoaddr-lea.ll @@ -44,3 +44,60 @@ entry: %0 = shl i64 %x, 1 ret i64 %0 } + +@global = external global i32, align 4 +@global2 = external global i64, align 8 + +; Test that liveness is properly updated and we do not encounter the +; assert/crash from http://llvm.org/PR28301 +; CHECK-LABEL: ham +define void @ham() { +bb: + br label %bb1 + +bb1: + %tmp = phi i64 [ %tmp40, %bb9 ], [ 0, %bb ] + %tmp2 = phi i32 [ %tmp39, %bb9 ], [ 0, %bb ] + %tmp3 = icmp sgt i32 undef, 10 + br i1 %tmp3, label %bb2, label %bb3 + +bb2: + %tmp6 = load i32, i32* @global, align 4 + %tmp8 = add nsw i32 %tmp6, %tmp2 + %tmp9 = sext i32 %tmp8 to i64 + br label %bb6 + +bb3: +; CHECK: subl %e[[REG0:[a-z0-9]+]], +; CHECK: leaq 4({{%[a-z0-9]+}}), %r[[REG0]] + %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ] + %tmp15 = add nuw i64 %tmp14, 4 + %tmp16 = trunc i64 %tmp14 to i32 + %tmp17 = sub i32 %tmp2, %tmp16 + br label %bb4 + +bb4: + %tmp20 = phi i64 [ %tmp14, %bb3 ], [ %tmp34, %bb5 ] + %tmp28 = icmp eq i32 %tmp17, 0 + br i1 %tmp28, label %bb5, label %bb8 + +bb5: + %tmp34 = add nuw nsw i64 %tmp20, 1 + %tmp35 = icmp slt i64 %tmp34, %tmp15 + br i1 %tmp35, label %bb4, label %bb3 + +bb6: + store volatile i64 %tmp, i64* @global2, align 8 + store volatile i64 %tmp9, i64* @global2, align 8 + store volatile i32 %tmp6, i32* @global, align 4 + %tmp45 = icmp slt i32 undef, undef + br i1 %tmp45, label %bb6, label %bb9 + +bb8: + unreachable + +bb9: + %tmp39 = add nuw nsw i32 %tmp2, 4 + %tmp40 = add nuw i64 %tmp, 4 + br label %bb1 +} |
