diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:19:10 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:19:10 +0000 |
| commit | d99dafe2e4a385dd2a6c76da6d8258deb100657b (patch) | |
| tree | ba60bf957558bd114f25dbff3d4996b5d7a61c82 /test/Transforms/InstCombine | |
| parent | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (diff) | |
Notes
Diffstat (limited to 'test/Transforms/InstCombine')
| -rw-r--r-- | test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll | 15 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll | 1196 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/call-cast-attrs.ll | 29 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/constant-fold-math.ll | 18 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/div-shift.ll | 15 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/div.ll | 39 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/pr32686.ll | 23 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/rem.ll | 22 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/shift.ll | 20 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/vector-casts.ll | 6 |
10 files changed, 1354 insertions, 29 deletions
diff --git a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll deleted file mode 100644 index 510a68c3437e..000000000000 --- a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep bitcast | count 2 - -define signext i32 @b(i32* inreg %x) { - ret i32 0 -} - -define void @c(...) { - ret void -} - -define void @g(i32* %y) { - call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)( i32 zeroext 0 ) ; <i32>:2 [#uses=0] - call void bitcast (void (...)* @c to void (i32*)*)( i32* sret null ) - ret void -} diff --git a/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll index 888f51bf939d..0c4842c15988 100644 --- a/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll +++ b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll @@ -227,6 +227,12 @@ define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i3 ret float %elt0 } +declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1 +declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1 +declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1 +declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1 + ; -------------------------------------------------------------------- ; llvm.amdgcn.buffer.load.format ; -------------------------------------------------------------------- @@ -304,18 +310,1196 @@ define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { ret i16 %tmp2 } -declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1 -declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1 -declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1 -declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1 - declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1 declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1 declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1 declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; FIXME: Should really fold to undef +; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; FIXME: Should really fold to undef +; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; FIXME: Should really fold to undef +; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; FIXME: Should really fold to undef +; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 9, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret <2 x float> %data +define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> + ret <2 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret <2 x float> %data +define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> + ret <2 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret <2 x float> %data +define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> + ret <2 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret <2 x float> %data +define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> + ret <2 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> +; CHECK-NEXT: ret <3 x float> %shuf +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + ret <3 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> +; CHECK-NEXT: ret <3 x float> %shuf +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + ret <3 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> +; CHECK-NEXT: ret <3 x float> %shuf +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + ret <3 x float> %shuf +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> +; CHECK-NEXT: ret <3 x float> %shuf +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false) + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + ret <3 x float> %shuf +} + +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.d +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_d_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.d.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.l +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_l_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.b +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_b_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.b.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.lz +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cd +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cd.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.d +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.d.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.l +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.b +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.b.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.lz +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cd +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cd.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.d.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.d.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.l.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.b.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.b.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.lz.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cd.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.cd.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.d.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.d.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.l.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.b.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.b.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.lz.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cd.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample.c.cd.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4 +; -------------------------------------------------------------------- + +; Don't handle gather4* + +; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v8i32( +; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i3 +define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.l +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.b +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.b.cl +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.lz +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.l.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.b.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.b.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.lz.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.l.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.b.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.b.cl.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.gather4.c.lz.o +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) +define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.getlod +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v8i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v2f32_v4i32( +; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret float %data +define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { + %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + ret float %elt0 +} + +declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + attributes #0 = { nounwind } attributes #1 = { nounwind readonly } diff --git a/test/Transforms/InstCombine/call-cast-attrs.ll b/test/Transforms/InstCombine/call-cast-attrs.ll new file mode 100644 index 000000000000..ddaf90c3e74f --- /dev/null +++ b/test/Transforms/InstCombine/call-cast-attrs.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define signext i32 @b(i32* inreg %x) { + ret i32 0 +} + +define void @c(...) { + ret void +} + +declare void @useit(i32) + +define void @d(i32 %x, ...) { + call void @useit(i32 %x) + ret void +} + +define void @g(i32* %y) { + call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0) + call void bitcast (void (...)* @c to void (i32*)*)(i32* %y) + call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y) + call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y) + ret void +} +; CHECK-LABEL: define void @g(i32* %y) +; CHECK: call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0) +; CHECK: call void (...) @c(i32* %y) +; CHECK: call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y) +; CHECK: call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y) diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll index 50cd6070896e..27578387f827 100644 --- a/test/Transforms/InstCombine/constant-fold-math.ll +++ b/test/Transforms/InstCombine/constant-fold-math.ll @@ -45,4 +45,22 @@ define double @constant_fold_fmuladd_f64() #0 { ret double %x } +; PR32177 + +; CHECK-LABEL: @constant_fold_frem_f32 +; CHECK-NEXT: ret float 0x41A61B2000000000 +define float @constant_fold_frem_f32() #0 { + %x = frem float 0x43cbfcd960000000, 0xc1e2b34a00000000 + ret float %x +} + +; PR3316 + +; CHECK-LABEL: @constant_fold_frem_f64 +; CHECK-NEXT: ret double 0.000000e+00 +define double @constant_fold_frem_f64() { + %x = frem double 0x43E0000000000000, 1.000000e+00 + ret double %x +} + attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll index 517313ed8e4e..b5a65048fda0 100644 --- a/test/Transforms/InstCombine/div-shift.ll +++ b/test/Transforms/InstCombine/div-shift.ll @@ -16,6 +16,21 @@ entry: ret i32 %d } +define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) { +; CHECK-LABEL: @t1vec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1> +; CHECK-NEXT: [[D:%.*]] = lshr <2 x i32> [[CONV]], [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[D]] +; +entry: + %conv = zext <2 x i16> %x to <2 x i32> + %s = shl <2 x i32> <i32 2, i32 2>, %y + %d = sdiv <2 x i32> %conv, %s + ret <2 x i32> %d +} + ; rdar://11721329 define i64 @t2(i64 %x, i32 %y) { ; CHECK-LABEL: @t2( diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll index a037607267ac..796fce020fd3 100644 --- a/test/Transforms/InstCombine/div.ll +++ b/test/Transforms/InstCombine/div.ll @@ -225,6 +225,16 @@ define i32 @test19(i32 %x) { ret i32 %A } +define <2 x i32> @test19vec(<2 x i32> %x) { +; CHECK-LABEL: @test19vec( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1, i32 1> +; CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[A]] +; + %A = udiv <2 x i32> <i32 1, i32 1>, %x + ret <2 x i32> %A +} + define i32 @test20(i32 %x) { ; CHECK-LABEL: @test20( ; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, 1 @@ -236,6 +246,17 @@ define i32 @test20(i32 %x) { ret i32 %A } +define <2 x i32> @test20vec(<2 x i32> %x) { +; CHECK-LABEL: @test20vec( +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 3, i32 3> +; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X]], <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i32> [[A]] +; + %A = sdiv <2 x i32> <i32 1, i32 1>, %x + ret <2 x i32> %A +} + define i32 @test21(i32 %a) { ; CHECK-LABEL: @test21( ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 %a, 3 @@ -388,6 +409,17 @@ define i32 @test35(i32 %A) { ret i32 %mul } +define <2 x i32> @test35vec(<2 x i32> %A) { +; CHECK-LABEL: @test35vec( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647> + %mul = sdiv exact <2 x i32> %and, <i32 2147483647, i32 2147483647> + ret <2 x i32> %mul +} + define i32 @test36(i32 %A) { ; CHECK-LABEL: @test36( ; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 2147483647 @@ -400,13 +432,10 @@ define i32 @test36(i32 %A) { ret i32 %mul } -; FIXME: Vector should get same transform as scalar. - define <2 x i32> @test36vec(<2 x i32> %A) { ; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %A, <i32 2147483647, i32 2147483647> -; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw <2 x i32> <i32 1, i32 1>, %A -; CHECK-NEXT: [[MUL:%.*]] = sdiv exact <2 x i32> [[AND]], [[SHL]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647> diff --git a/test/Transforms/InstCombine/pr32686.ll b/test/Transforms/InstCombine/pr32686.ll new file mode 100644 index 000000000000..b2d2aff2fde8 --- /dev/null +++ b/test/Transforms/InstCombine/pr32686.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s | FileCheck %s + +@a = external global i8 +@b = external global i32 + +define void @tinkywinky() { +; CHECK-LABEL: @tinkywinky( +; CHECK-NEXT: [[PATATINO:%.*]] = load i8, i8* @a, align 1 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[PATATINO]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i32 +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[TMP1]], or (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2) +; CHECK-NEXT: store i32 [[OR1]], i32* @b, align 4 +; CHECK-NEXT: ret void +; + %patatino = load i8, i8* @a + %tobool = icmp ne i8 %patatino, 0 + %lnot = xor i1 %tobool, true + %lnot.ext = zext i1 %lnot to i32 + %or = or i32 xor (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2), %lnot.ext + store i32 %or, i32* @b, align 4 + ret void +} diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll index 7a7a134db9c5..86a3580189fd 100644 --- a/test/Transforms/InstCombine/rem.ll +++ b/test/Transforms/InstCombine/rem.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define i64 @rem_signed(i64 %x1, i64 %y2) { @@ -571,3 +572,24 @@ rem.is.unsafe: ret i32 0 } +define i32 @test22(i32 %A) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 2147483647 +; CHECK-NEXT: [[MUL:%.*]] = urem i32 [[AND]], 2147483647 +; CHECK-NEXT: ret i32 [[MUL]] +; + %and = and i32 %A, 2147483647 + %mul = srem i32 %and, 2147483647 + ret i32 %mul +} + +define <2 x i32> @test23(<2 x i32> %A) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647> + %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647> + ret <2 x i32> %mul +} diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index 60ba35557f70..d5f489280a03 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -1268,3 +1268,23 @@ define <2 x i64> @test_64_splat_vec(<2 x i32> %t) { ret <2 x i64> %shl } +define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { +; CHECK-LABEL: @ashr_demanded_bits_splat( +; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7> +; CHECK-NEXT: ret <2 x i8> [[SHR]] +; + %and = and <2 x i8> %x, <i8 128, i8 128> + %shr = ashr <2 x i8> %and, <i8 7, i8 7> + ret <2 x i8> %shr +} + +define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { +; CHECK-LABEL: @lshr_demanded_bits_splat( +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> %x, <i8 7, i8 7> +; CHECK-NEXT: ret <2 x i8> [[SHR]] +; + %and = and <2 x i8> %x, <i8 128, i8 128> + %shr = lshr <2 x i8> %and, <i8 7, i8 7> + ret <2 x i8> %shr +} + diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll index 643ab6c5348f..2197c250ace2 100644 --- a/test/Transforms/InstCombine/vector-casts.ll +++ b/test/Transforms/InstCombine/vector-casts.ll @@ -15,9 +15,9 @@ define <2 x i1> @test1(<2 x i64> %a) { ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, <i64 65535, i64 65535> -; CHECK-NEXT: [[T:%.*]] = lshr <2 x i64> [[B]], <i64 1, i64 1> -; CHECK-NEXT: ret <2 x i64> [[T]] +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, <i64 65534, i64 65534> +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <2 x i64> [[B]], <i64 1, i64 1> +; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %b = and <2 x i64> %a, <i64 65535, i64 65535> %t = ashr <2 x i64> %b, <i64 1, i64 1> |
