aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-09-06 18:34:38 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-09-06 18:34:38 +0000
commit69156b4c20249e7800cc09e0eef0beb3d15ac1ad (patch)
tree461d3cf041290f4a99740d540bf0973d6084f98e /test
parentee8648bdac07986a0f1ec897b02ec82a2f144d46 (diff)
Diffstat (limited to 'test')
-rw-r--r--test/Analysis/BasicAA/gep-alias.ll48
-rw-r--r--test/Analysis/BasicAA/phi-aa.ll1
-rw-r--r--test/Analysis/BasicAA/zext.ll209
-rw-r--r--test/CodeGen/AMDGPU/cgp-addressing-modes.ll16
-rw-r--r--test/CodeGen/AMDGPU/global_atomics.ll280
-rw-r--r--test/CodeGen/AMDGPU/gv-const-addrspace.ll12
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll12
-rw-r--r--test/CodeGen/AMDGPU/private-memory.ll2
-rw-r--r--test/CodeGen/AMDGPU/scratch-buffer.ll51
-rw-r--r--test/CodeGen/AMDGPU/smrd.ll8
-rw-r--r--test/CodeGen/ARM/ldrd.ll56
-rw-r--r--test/CodeGen/Mips/Fast-ISel/br1.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/bswap1.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/callabi.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/constexpr-address.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/div1.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fastalloca.ll2
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fastcc-miss.ll15
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpcmpa.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpext.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpintconv.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fptrunc.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/icmpa.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstore2.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll8
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstrconst.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/logopm.ll24
-rw-r--r--test/CodeGen/Mips/Fast-ISel/memtest1.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/mul1.ll6
-rw-r--r--test/CodeGen/Mips/Fast-ISel/nullvoid.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/overflt.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/rem1.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/retabi.ll2
-rw-r--r--test/CodeGen/Mips/Fast-ISel/sel1.ll20
-rw-r--r--test/CodeGen/Mips/Fast-ISel/shftopm.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/shift.ll2
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestore.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll8
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestorei.ll11
-rw-r--r--test/CodeGen/Mips/delay-slot-kill.ll2
-rw-r--r--test/CodeGen/Mips/emergency-spill-slot-near-fp.ll10
-rw-r--r--test/CodeGen/Mips/llvm-ir/and.ll5
-rw-r--r--test/CodeGen/Mips/llvm-ir/or.ll6
-rw-r--r--test/CodeGen/Mips/llvm-ir/xor.ll5
-rw-r--r--test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll16
-rw-r--r--test/CodeGen/PowerPC/ppc64-patchpoint.ll15
-rw-r--r--test/CodeGen/PowerPC/pr24216.ll14
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle_le.ll2
-rw-r--r--test/CodeGen/PowerPC/vsx.ll69
-rw-r--r--test/CodeGen/PowerPC/vsx_insert_extract_le.ll8
-rw-r--r--test/CodeGen/PowerPC/xvcmpeqdp-v2f64.ll38
-rw-r--r--test/CodeGen/SystemZ/args-04.ll14
-rw-r--r--test/CodeGen/SystemZ/args-07.ll60
-rw-r--r--test/CodeGen/SystemZ/args-08.ll57
-rw-r--r--test/CodeGen/SystemZ/vec-args-06.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-args-07.ll47
-rw-r--r--test/CodeGen/X86/fdiv-combine.ll19
-rw-r--r--test/CodeGen/X86/machine-trace-metrics-crash.ll62
-rw-r--r--test/CodeGen/X86/pr2656.ll32
-rw-r--r--test/CodeGen/X86/sse-fcopysign.ll32
-rw-r--r--test/CodeGen/X86/vec_fabs.ll4
-rw-r--r--test/DebugInfo/Mips/delay-slot.ll14
-rw-r--r--test/MC/AMDGPU/vopc.s26
-rw-r--r--test/MC/Disassembler/PowerPC/ppc64le-encoding.txt664
-rw-r--r--test/MC/X86/intel-syntax.s14
-rw-r--r--test/Object/archive-extract.test2
-rw-r--r--test/Transforms/GVN/pr24397.ll18
-rw-r--r--test/Transforms/InstCombine/pr24354.ll33
-rw-r--r--test/Transforms/InstCombine/vector-casts.ll11
-rw-r--r--test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll10
-rw-r--r--test/Transforms/SROA/basictest.ll10
-rw-r--r--test/Transforms/SROA/big-endian.ll123
-rw-r--r--test/Transforms/SROA/phi-and-select.ll18
-rw-r--r--test/Transforms/Scalarizer/cache-bug.ll30
74 files changed, 1886 insertions, 552 deletions
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index f686010f9ead..1e435af2f12f 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -228,3 +228,51 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind {
; CHECK-LABEL: @test12(
; CHECK: ret i32 %r
}
+
+@P = internal global i32 715827882, align 4
+@Q = internal global i32 715827883, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%u %u\0A\00", align 1
+
+; Make sure we recognize that u[0] and u[Global + Cst] may alias
+; when the addition has wrapping semantic.
+; PR24468.
+; CHECK-LABEL: @test13(
+; Make sure the stores appear before the related loads.
+; CHECK: store i8 42,
+; CHECK: store i8 99,
+; Find the loads and make sure they are used in the arguments to the printf.
+; CHECK: [[T0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %t, i32 0, i32 0
+; CHECK: [[T0:%[a-zA-Z0-9_]+]] = load i8, i8* [[T0ADDR]], align 1
+; CHECK: [[T0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[T0]] to i32
+; CHECK: [[U0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %u, i32 0, i32 0
+; CHECK: [[U0:%[a-zA-Z0-9_]+]] = load i8, i8* [[U0ADDR]], align 1
+; CHECK: [[U0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[U0]] to i32
+; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]])
+; CHECK: ret
+define void @test13() {
+entry:
+ %t = alloca [3 x i8], align 1
+ %u = alloca [3 x i8], align 1
+ %tmp = load i32, i32* @P, align 4
+ %tmp1 = mul i32 %tmp, 3
+ %mul = add i32 %tmp1, -2147483646
+ %idxprom = zext i32 %mul to i64
+ %arrayidx = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 %idxprom
+ store i8 42, i8* %arrayidx, align 1
+ %tmp2 = load i32, i32* @Q, align 4
+ %tmp3 = mul i32 %tmp2, 3
+ %mul2 = add i32 %tmp3, 2147483647
+ %idxprom3 = zext i32 %mul2 to i64
+ %arrayidx4 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 %idxprom3
+ store i8 99, i8* %arrayidx4, align 1
+ %arrayidx5 = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 0
+ %tmp4 = load i8, i8* %arrayidx5, align 1
+ %conv = zext i8 %tmp4 to i32
+ %arrayidx6 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 0
+ %tmp5 = load i8, i8* %arrayidx6, align 1
+ %conv7 = zext i8 %tmp5 to i32
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 %conv, i32 %conv7)
+ ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 3944e9e43566..a72778277bb2 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -39,7 +39,6 @@ return:
; CHECK-LABEL: pr18068
; CHECK: MayAlias: i32* %0, i32* %arrayidx5
-; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
define i32 @pr18068(i32* %jj7, i32* %j) {
entry:
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
deleted file mode 100644
index ed3565640251..000000000000
--- a/test/Analysis/BasicAA/zext.ll
+++ /dev/null
@@ -1,209 +0,0 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; CHECK-LABEL: test_with_zext
-; CHECK: NoAlias: i8* %a, i8* %b
-
-define void @test_with_zext() {
- %1 = tail call i8* @malloc(i64 120)
- %a = getelementptr inbounds i8, i8* %1, i64 8
- %2 = getelementptr inbounds i8, i8* %1, i64 16
- %3 = zext i32 3 to i64
- %b = getelementptr inbounds i8, i8* %2, i64 %3
- ret void
-}
-
-; CHECK-LABEL: test_with_lshr
-; CHECK: NoAlias: i8* %a, i8* %b
-
-define void @test_with_lshr(i64 %i) {
- %1 = tail call i8* @malloc(i64 120)
- %a = getelementptr inbounds i8, i8* %1, i64 8
- %2 = getelementptr inbounds i8, i8* %1, i64 16
- %3 = lshr i64 %i, 2
- %b = getelementptr inbounds i8, i8* %2, i64 %3
- ret void
-}
-
-; CHECK-LABEL: test_with_a_loop
-; CHECK: NoAlias: i8* %a, i8* %b
-
-define void @test_with_a_loop(i8* %mem) {
- br label %for.loop
-
-for.loop:
- %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
- %a = getelementptr inbounds i8, i8* %mem, i64 8
- %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
- %i.64 = zext i32 %i to i64
- %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
- %i.plus1 = add nuw nsw i32 %i, 1
- %cmp = icmp eq i32 %i.plus1, 10
- br i1 %cmp, label %for.loop.exit, label %for.loop
-
-for.loop.exit:
- ret void
-}
-
-; CHECK-LABEL: test_with_varying_base_pointer_in_loop
-; CHECK: NoAlias: i8* %a, i8* %b
-
-define void @test_with_varying_base_pointer_in_loop(i8* %mem.orig) {
- br label %for.loop
-
-for.loop:
- %mem = phi i8* [ %mem.orig, %0 ], [ %mem.plus1, %for.loop ]
- %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
- %a = getelementptr inbounds i8, i8* %mem, i64 8
- %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
- %i.64 = zext i32 %i to i64
- %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
- %i.plus1 = add nuw nsw i32 %i, 1
- %mem.plus1 = getelementptr inbounds i8, i8* %mem, i64 8
- %cmp = icmp eq i32 %i.plus1, 10
- br i1 %cmp, label %for.loop.exit, label %for.loop
-
-for.loop.exit:
- ret void
-}
-
-; CHECK-LABEL: test_sign_extension
-; CHECK: PartialAlias: i64* %b.i64, i8* %a
-
-define void @test_sign_extension(i32 %p) {
- %1 = tail call i8* @malloc(i64 120)
- %p.64 = zext i32 %p to i64
- %a = getelementptr inbounds i8, i8* %1, i64 %p.64
- %p.minus1 = add i32 %p, -1
- %p.minus1.64 = zext i32 %p.minus1 to i64
- %b.i8 = getelementptr inbounds i8, i8* %1, i64 %p.minus1.64
- %b.i64 = bitcast i8* %b.i8 to i64*
- ret void
-}
-
-; CHECK-LABEL: test_fe_tools
-; CHECK: PartialAlias: i32* %a, i32* %b
-
-define void @test_fe_tools([8 x i32]* %values) {
- br label %reorder
-
-for.loop:
- %i = phi i32 [ 0, %reorder ], [ %i.next, %for.loop ]
- %idxprom = zext i32 %i to i64
- %b = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 %idxprom
- %i.next = add nuw nsw i32 %i, 1
- %1 = icmp eq i32 %i.next, 10
- br i1 %1, label %for.loop.exit, label %for.loop
-
-reorder:
- %a = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 1
- br label %for.loop
-
-for.loop.exit:
- ret void
-}
-
-@b = global i32 0, align 4
-@d = global i32 0, align 4
-
-; CHECK-LABEL: test_spec2006
-; CHECK: PartialAlias: i32** %x, i32** %y
-
-define void @test_spec2006() {
- %h = alloca [1 x [2 x i32*]], align 16
- %d.val = load i32, i32* @d, align 4
- %d.promoted = sext i32 %d.val to i64
- %1 = icmp slt i32 %d.val, 2
- br i1 %1, label %.lr.ph, label %3
-
-.lr.ph: ; preds = %0
- br label %2
-
-; <label>:2 ; preds = %.lr.ph, %2
- %i = phi i32 [ %d.val, %.lr.ph ], [ %i.plus1, %2 ]
- %i.promoted = sext i32 %i to i64
- %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %d.promoted, i64 %i.promoted
- %i.plus1 = add nsw i32 %i, 1
- %cmp = icmp slt i32 %i.plus1, 2
- br i1 %cmp, label %2, label %3
-
-; <label>:3 ; preds = %._crit_edge, %0
- %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
- ret void
-}
-
-; CHECK-LABEL: test_modulo_analysis_easy_case
-; CHECK: NoAlias: i32** %x, i32** %y
-
-define void @test_modulo_analysis_easy_case(i64 %i) {
- %h = alloca [1 x [2 x i32*]], align 16
- %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i, i64 0
- %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
- ret void
-}
-
-; CHECK-LABEL: test_modulo_analysis_in_loop
-; CHECK: NoAlias: i32** %x, i32** %y
-
-define void @test_modulo_analysis_in_loop() {
- %h = alloca [1 x [2 x i32*]], align 16
- br label %for.loop
-
-for.loop:
- %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
- %i.promoted = sext i32 %i to i64
- %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 0
- %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
- %i.plus1 = add nsw i32 %i, 1
- %cmp = icmp slt i32 %i.plus1, 2
- br i1 %cmp, label %for.loop, label %for.loop.exit
-
-for.loop.exit:
- ret void
-}
-
-; CHECK-LABEL: test_modulo_analysis_with_global
-; CHECK: PartialAlias: i32** %x, i32** %y
-
-define void @test_modulo_analysis_with_global() {
- %h = alloca [1 x [2 x i32*]], align 16
- %b = load i32, i32* @b, align 4
- %b.promoted = sext i32 %b to i64
- br label %for.loop
-
-for.loop:
- %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
- %i.promoted = sext i32 %i to i64
- %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 %b.promoted
- %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
- %i.plus1 = add nsw i32 %i, 1
- %cmp = icmp slt i32 %i.plus1, 2
- br i1 %cmp, label %for.loop, label %for.loop.exit
-
-for.loop.exit:
- ret void
-}
-
-; CHECK-LABEL: test_const_eval
-; CHECK: NoAlias: i8* %a, i8* %b
-define void @test_const_eval(i8* %ptr, i64 %offset) {
- %a = getelementptr inbounds i8, i8* %ptr, i64 %offset
- %a.dup = getelementptr inbounds i8, i8* %ptr, i64 %offset
- %three = zext i32 3 to i64
- %b = getelementptr inbounds i8, i8* %a.dup, i64 %three
- ret void
-}
-
-; CHECK-LABEL: test_const_eval_scaled
-; CHECK: MustAlias: i8* %a, i8* %b
-define void @test_const_eval_scaled(i8* %ptr) {
- %three = zext i32 3 to i64
- %six = mul i64 %three, 2
- %a = getelementptr inbounds i8, i8* %ptr, i64 %six
- %b = getelementptr inbounds i8, i8* %ptr, i64 6
- ret void
-}
-
-; Function Attrs: nounwind
-declare noalias i8* @malloc(i64)
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 77f7bd01b7f0..a68d110fdc96 100644
--- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -1,12 +1,15 @@
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare i32 @llvm.r600.read.tidig.x() #0
; OPT-LABEL: @test_sink_global_small_offset_i32(
-; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
; OPT: br i1
-; OPT: ptrtoint
+; OPT-CI: ptrtoint
; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
; GCN: {{^}}BB0_2:
@@ -214,8 +217,11 @@ done:
}
; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
+; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
+; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
; GCN: s_and_saveexec_b64
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: {{^}}BB7_2:
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
entry:
diff --git a/test/CodeGen/AMDGPU/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll
index 847950f6376e..146f0a5fbf26 100644
--- a/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/test/CodeGen/AMDGPU/global_atomics.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=FUNC %s
+
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
-; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -10,8 +12,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -22,6 +24,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -32,7 +38,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -43,7 +52,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32:
-; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -51,8 +60,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret:
-; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -62,6 +71,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -71,7 +83,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -81,7 +96,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
-; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -90,8 +105,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -102,6 +117,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -112,7 +130,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -123,7 +144,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32:
-; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -131,8 +152,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret:
-; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -142,6 +163,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -151,7 +175,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -161,7 +188,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
-; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -170,8 +197,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -182,6 +209,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -192,7 +222,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -203,7 +236,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32:
-; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -211,8 +244,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
-; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -222,6 +255,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -231,7 +267,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -241,7 +280,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
-; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -250,8 +289,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -262,6 +301,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -272,7 +314,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -283,7 +328,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32:
-; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -291,8 +336,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret:
-; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -302,6 +347,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -311,7 +359,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -321,7 +372,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
-; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -330,8 +381,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -342,6 +393,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -352,7 +406,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -363,7 +420,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32:
-; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -371,8 +428,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
-; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -382,6 +439,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -391,7 +451,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -401,7 +464,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
-; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -410,8 +473,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -422,6 +485,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -432,7 +498,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -443,7 +512,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32:
-; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -451,8 +520,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret:
-; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -462,6 +531,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -471,7 +543,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -481,7 +556,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
-; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -490,8 +565,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -502,6 +577,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -512,7 +590,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -523,7 +604,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32:
-; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -532,7 +613,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -542,6 +623,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -551,7 +635,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -561,7 +648,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
-; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -570,8 +657,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -582,6 +669,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -592,7 +682,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -603,7 +696,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32:
-; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -611,8 +704,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret:
-; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -622,6 +715,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -631,7 +727,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -641,7 +740,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
-; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -650,8 +749,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -672,7 +771,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -683,7 +785,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32:
-; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -691,8 +793,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
-; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -702,6 +804,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -711,7 +816,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -721,7 +829,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
-; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -730,8 +838,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -742,6 +850,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -752,7 +863,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -763,7 +877,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32:
-; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -771,8 +885,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
-; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -782,6 +896,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -791,7 +908,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
diff --git a/test/CodeGen/AMDGPU/gv-const-addrspace.ll b/test/CodeGen/AMDGPU/gv-const-addrspace.ll
index 3c1fc6c98f74..d4d13125cfbf 100644
--- a/test/CodeGen/AMDGPU/gv-const-addrspace.ll
+++ b/test/CodeGen/AMDGPU/gv-const-addrspace.ll
@@ -8,9 +8,7 @@
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
; FUNC-LABEL: {{^}}float:
-; FIXME: We should be using s_load_dword here.
-; SI: buffer_load_dword
-; VI: s_load_dword
+; GCN: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -31,9 +29,7 @@ entry:
; FUNC-LABEL: {{^}}i32:
-; FIXME: We should be using s_load_dword here.
-; SI: buffer_load_dword
-; VI: s_load_dword
+; GCN: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -71,9 +67,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
<1 x i32> <i32 4> ]
; FUNC-LABEL: {{^}}array_v1_gv_load:
-; FIXME: We should be using s_load_dword here.
-; SI: buffer_load_dword
-; VI: s_load_dword
+; GCN: s_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll
index e098dd35d6da..6049dca04012 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll
@@ -11,8 +11,8 @@ declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
; CI: buffer_store_dwordx2 [[FRC]]
define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
@@ -28,8 +28,8 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nou
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
; CI: buffer_store_dwordx2 [[FRC]]
define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
@@ -46,8 +46,8 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src)
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
; CI: buffer_store_dwordx2 [[FRC]]
define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll
index 1c5629780508..645dc04f4420 100644
--- a/test/CodeGen/AMDGPU/private-memory.ll
+++ b/test/CodeGen/AMDGPU/private-memory.ll
@@ -298,7 +298,7 @@ entry:
; FUNC-LABEL: ptrtoint:
; SI-NOT: ds_write
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
-; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
+; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%alloca = alloca [16 x i32]
%tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll
index 56088718ada8..268869daaa32 100644
--- a/test/CodeGen/AMDGPU/scratch-buffer.ll
+++ b/test/CodeGen/AMDGPU/scratch-buffer.ll
@@ -1,5 +1,7 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s
; When a frame index offset is more than 12-bits, make sure we don't store
; it in mubuf's offset field.
@@ -8,11 +10,11 @@
; for both stores. This register is allocated by the register scavenger, so we
; should be able to reuse the same regiser for each scratch buffer access.
-; CHECK-LABEL: {{^}}legal_offset_fi:
-; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+; GCN-LABEL: {{^}}legal_offset_fi:
+; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000
+; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
entry:
@@ -47,10 +49,10 @@ done:
}
-; CHECK-LABEL: {{^}}legal_offset_fi_offset
-; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+; GCN-LABEL: {{^}}legal_offset_fi_offset
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
entry:
@@ -85,3 +87,30 @@ done:
ret void
}
+; GCN-LABEL: @neg_vaddr_offset
+; We can't prove %offset is positive, so we must do the computation with the
+; immediate in an add instruction instead of folding offset and the immediate into
+; the store instruction.
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
+define void @neg_vaddr_offset(i32 %offset) {
+entry:
+ %array = alloca [8192 x i32]
+ %ptr_offset = add i32 %offset, 4
+ %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
+ store i32 0, i32* %ptr
+ ret void
+}
+
+; GCN-LABEL: @pos_vaddr_offse
+; DEFAULT-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16
+; HUGE-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
+define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
+entry:
+ %array = alloca [8192 x i32]
+ %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4
+ store i32 0, i32* %ptr
+ %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset
+ %val = load i32, i32* %load_ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index b0c18ca5959c..0598208e1317 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -43,13 +43,7 @@ entry:
; GCN-LABEL: {{^}}smrd3:
; FIXME: There are too many copies here because we don't fold immediates
; through REG_SEQUENCE
-; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
-; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
-; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; FIXME: We should be able to use s_load_dword here
-; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 56cdcaedf900..5411618ed86d 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -112,10 +112,10 @@ entry:
}
; CHECK-LABEL: strd_spill_ldrd_reload:
-; A8: strd r1, r0, [sp, #-8]!
-; M3: strd r1, r0, [sp, #-8]!
-; BASIC: strd r1, r0, [sp, #-8]!
-; GREEDY: strd r0, r1, [sp, #-8]!
+; A8: strd r1, r0, [sp]
+; M3: strd r1, r0, [sp]
+; BASIC: strd r1, r0, [sp]
+; GREEDY: strd r0, r1, [sp]
; CHECK: @ InlineAsm Start
; CHECK: @ InlineAsm End
; A8: ldrd r2, r1, [sp]
@@ -131,53 +131,5 @@ define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
ret void
}
-declare void @extfunc2(i32*, i32, i32)
-
-; CHECK-LABEL: ldrd_postupdate_dec:
-; CHECK: ldrd r1, r2, [r0], #-8
-; CHECK-NEXT: bl{{x?}} _extfunc
-define void @ldrd_postupdate_dec(i32* %p0) {
- %p0.1 = getelementptr i32, i32* %p0, i32 1
- %v0 = load i32, i32* %p0
- %v1 = load i32, i32* %p0.1
- %p1 = getelementptr i32, i32* %p0, i32 -2
- call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
- ret void
-}
-
-; CHECK-LABEL: ldrd_postupdate_inc:
-; CHECK: ldrd r1, r2, [r0], #8
-; CHECK-NEXT: bl{{x?}} _extfunc
-define void @ldrd_postupdate_inc(i32* %p0) {
- %p0.1 = getelementptr i32, i32* %p0, i32 1
- %v0 = load i32, i32* %p0
- %v1 = load i32, i32* %p0.1
- %p1 = getelementptr i32, i32* %p0, i32 2
- call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
- ret void
-}
-
-; CHECK-LABEL: strd_postupdate_dec:
-; CHECK: strd r1, r2, [r0], #-8
-; CHECK-NEXT: bx lr
-define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
- %p0.1 = getelementptr i32, i32* %p0, i32 1
- store i32 %v0, i32* %p0
- store i32 %v1, i32* %p0.1
- %p1 = getelementptr i32, i32* %p0, i32 -2
- ret i32* %p1
-}
-
-; CHECK-LABEL: strd_postupdate_inc:
-; CHECK: strd r1, r2, [r0], #8
-; CHECK-NEXT: bx lr
-define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) {
- %p0.1 = getelementptr i32, i32* %p0, i32 1
- store i32 %v0, i32* %p0
- store i32 %v1, i32* %p0.1
- %p1 = getelementptr i32, i32* %p0, i32 2
- ret i32* %p1
-}
-
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/Mips/Fast-ISel/br1.ll b/test/CodeGen/Mips/Fast-ISel/br1.ll
index 11842ddc4188..a448e90187cb 100644
--- a/test/CodeGen/Mips/Fast-ISel/br1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/br1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@b = global i32 1, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/bswap1.ll b/test/CodeGen/Mips/Fast-ISel/bswap1.ll
index 8ac9753fa463..8f1f703ea078 100644
--- a/test/CodeGen/Mips/Fast-ISel/bswap1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/bswap1.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R1
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R2
@a = global i16 -21829, align 2
diff --git a/test/CodeGen/Mips/Fast-ISel/callabi.ll b/test/CodeGen/Mips/Fast-ISel/callabi.ll
index 8f5d68b41f66..34616a50b1a0 100644
--- a/test/CodeGen/Mips/Fast-ISel/callabi.ll
+++ b/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -1,8 +1,8 @@
; RUN: llc -march=mipsel -mcpu=mips32 -O0 \
-; RUN: -mips-fast-isel -relocation-model=pic -fast-isel-abort=1 < %s | \
+; RUN: -relocation-model=pic -fast-isel-abort=1 < %s | \
; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32R1
; RUN: llc -march=mipsel -mcpu=mips32r2 -O0 \
-; RUN: -mips-fast-isel -relocation-model=pic -fast-isel-abort=1 < %s | \
+; RUN: -relocation-model=pic -fast-isel-abort=1 < %s | \
; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32R2
declare void @xb(i8)
diff --git a/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll b/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll
index df60d8071836..d6d9074c7c19 100644
--- a/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll
+++ b/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+; RUN: -fast-isel=true -fast-isel-abort=1 < %s | FileCheck %s
; RUN: llc -march=mipsel -mcpu=mips32r2 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+; RUN: -fast-isel=true -fast-isel-abort=1 < %s | FileCheck %s
@ARR = external global [10 x i32], align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/div1.ll b/test/CodeGen/Mips/Fast-ISel/div1.ll
index 89e7f211251f..89055aa12805 100644
--- a/test/CodeGen/Mips/Fast-ISel/div1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/div1.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: -fast-isel-abort=1 | FileCheck %s
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: -fast-isel-abort=1 | FileCheck %s
@sj = global i32 200000, align 4
@sk = global i32 -47, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/fastalloca.ll b/test/CodeGen/Mips/Fast-ISel/fastalloca.ll
index b4a9f1ce7ab0..00bc7f485e08 100644
--- a/test/CodeGen/Mips/Fast-ISel/fastalloca.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fastalloca.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
%struct.x = type { i32 }
diff --git a/test/CodeGen/Mips/Fast-ISel/fastcc-miss.ll b/test/CodeGen/Mips/Fast-ISel/fastcc-miss.ll
new file mode 100644
index 000000000000..d9ce8b3964a4
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fastcc-miss.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose 2>&1 | FileCheck %s
+
+; CHECK: FastISel missed call:
+; CHECK-SAME: %call = call fastcc i32 @foo(i32 signext %a, i32 signext %b)
+
+define internal i32 @bar(i32 signext %a, i32 signext %b) {
+ %s = and i32 %a, %b
+ ret i32 %s
+}
+
+define i32 @foo(i32 signext %a, i32 signext %b) {
+ %call = call fastcc i32 @foo(i32 signext %a, i32 signext %b)
+ ret i32 %call
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
index 72de888b26e0..e346acfeff13 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@f1 = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/fpext.ll b/test/CodeGen/Mips/Fast-ISel/fpext.ll
index 5ac22490ff02..f78289f40a02 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpext.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpext.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@f = global float 0x40147E6B80000000, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/fpintconv.ll b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
index a94ef5081539..2c022be5b3f7 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
diff --git a/test/CodeGen/Mips/Fast-ISel/fptrunc.ll b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
index 2eec4c3ef547..89a7bfce5b05 100644
--- a/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@d = global double 0x40147E6B74DF0446, align 8
diff --git a/test/CodeGen/Mips/Fast-ISel/icmpa.ll b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
index 670a8d5cfb4e..fc37e118e755 100644
--- a/test/CodeGen/Mips/Fast-ISel/icmpa.ll
+++ b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@c = global i32 4, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
index 3daf03d681cb..46f7a42a5fef 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -4,9 +4,9 @@ target triple = "mips--linux-gnu"
@c2 = common global i8 0, align 1
@c1 = common global i8 0, align 1
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@s2 = common global i16 0, align 2
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
index acba132b28e1..09b56d2c87ec 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s -check-prefix=mips32r2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s -check-prefix=mips32
@b2 = global i8 0, align 1
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
index 9f644ecd1875..1051b2800e5b 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
diff --git a/test/CodeGen/Mips/Fast-ISel/logopm.ll b/test/CodeGen/Mips/Fast-ISel/logopm.ll
index 0f0c3bf9e1dc..fec85092fffd 100644
--- a/test/CodeGen/Mips/Fast-ISel/logopm.ll
+++ b/test/CodeGen/Mips/Fast-ISel/logopm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 < %s | FileCheck %s
@ub1 = common global i8 0, align 1
@ub2 = common global i8 0, align 1
@@ -283,8 +283,8 @@ entry:
; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
-; CHECK-DAG: addiu $[[CONST_Neg89:[0-9]+]], $zero, -89
-; CHECK-DAG: and $[[RES:[0-9]+]], $[[UC1]], $[[CONST_Neg89]]
+; CHECK-DAG: addiu $[[CONST_167:[0-9]+]], $zero, 167
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[UC1]], $[[CONST_167]]
; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
; CHECK: .end andUc1
ret void
@@ -345,8 +345,8 @@ entry:
; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
-; CHECK-DAG: addiu $[[CONST_neg18:[0-9]+]], $zero, -18
-; CHECK-DAG: or $[[RES:[0-9]+]], $[[UC1]], $[[CONST_neg18]]
+; CHECK-DAG: addiu $[[CONST_238:[0-9]+]], $zero, 238
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[UC1]], $[[CONST_238]]
; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
; CHECK: .end orUc1
ret void
@@ -469,8 +469,8 @@ entry:
; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
-; CHECK-DAG: addiu $[[CONST_Neg4185:[0-9]+]], $zero, -4185
-; CHECK-DAG: and $[[RES:[0-9]+]], $[[US1]], $[[CONST_Neg4185]]
+; CHECK-DAG: ori $[[CONST_61351:[0-9]+]], $zero, 61351
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[US1]], $[[CONST_61351]]
; CHECK: sh $[[RES]], 0($[[US_ADDR]])
; CHECK: .end andUs1
ret void
@@ -520,8 +520,8 @@ entry:
; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
-; CHECK-DAG: addiu $[[CONST_neg4591:[0-9]+]], $zero, -4591
-; CHECK-DAG: or $[[RES:[0-9]+]], $[[US1]], $[[CONST_neg4591]]
+; CHECK-DAG: ori $[[CONST_60945:[0-9]+]], $zero, 60945
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[US1]], $[[CONST_60945]]
; CHECK: sh $[[RES]], 0($[[US_ADDR]])
; CHECK: .end orUs1
ret void
@@ -583,8 +583,8 @@ entry:
; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
-; CHECK-DAG: addiu $[[CONST_Neg5512:[0-9]+]], $zero, -5512
-; CHECK-DAG: xor $[[RES:[0-9]+]], $[[US1]], $[[CONST_Neg5512]]
+; CHECK-DAG: ori $[[CONST_60024:[0-9]+]], $zero, 60024
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[US1]], $[[CONST_60024]]
; CHECK: sh $[[RES]], 0($[[US_ADDR]])
; CHECK: .end xorUs1
ret void
diff --git a/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/test/CodeGen/Mips/Fast-ISel/memtest1.ll
index a3fc4a32981c..b98200d7456d 100644
--- a/test/CodeGen/Mips/Fast-ISel/memtest1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/memtest1.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R1
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R2
@str = private unnamed_addr constant [12 x i8] c"hello there\00", align 1
diff --git a/test/CodeGen/Mips/Fast-ISel/mul1.ll b/test/CodeGen/Mips/Fast-ISel/mul1.ll
index 0ee044bea0a7..8713e7ef1d96 100644
--- a/test/CodeGen/Mips/Fast-ISel/mul1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/mul1.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 \
-; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 \
-; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic
; The test is just to make sure it is able to allocate
; registers for this example. There was an issue with allocating AC0
diff --git a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
index 5fa3f13ace4c..106015e30c35 100644
--- a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
+++ b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
; Function Attrs: nounwind
diff --git a/test/CodeGen/Mips/Fast-ISel/overflt.ll b/test/CodeGen/Mips/Fast-ISel/overflt.ll
index 57f991e23d95..37e87b29c58e 100644
--- a/test/CodeGen/Mips/Fast-ISel/overflt.ll
+++ b/test/CodeGen/Mips/Fast-ISel/overflt.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@x = common global [128000 x float] zeroinitializer, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/rem1.ll b/test/CodeGen/Mips/Fast-ISel/rem1.ll
index 9b5e440d0eaa..cf709e7e4954 100644
--- a/test/CodeGen/Mips/Fast-ISel/rem1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/rem1.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: -fast-isel-abort=1 | FileCheck %s
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
-; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: -fast-isel-abort=1 | FileCheck %s
@sj = global i32 200, align 4
@sk = global i32 -47, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/retabi.ll b/test/CodeGen/Mips/Fast-ISel/retabi.ll
index 03119b827eb6..20747c4ed206 100644
--- a/test/CodeGen/Mips/Fast-ISel/retabi.ll
+++ b/test/CodeGen/Mips/Fast-ISel/retabi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
@i = global i32 75, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/sel1.ll b/test/CodeGen/Mips/Fast-ISel/sel1.ll
index 47b6a895cde8..8f762b0ed088 100644
--- a/test/CodeGen/Mips/Fast-ISel/sel1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/sel1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O2 -relocation-model=pic \
-; RUN: -fast-isel -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: -fast-isel -fast-isel-abort=1 | FileCheck %s
define i1 @sel_i1(i1 %j, i1 %k, i1 %l) {
entry:
@@ -8,7 +8,8 @@ entry:
; FIXME: The following instruction is redundant.
; CHECK: xor $[[T0:[0-9]+]], $4, $zero
; CHECK-NEXT: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
- ; CHECK-NEXT: movn $6, $5, $[[T1]]
+ ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1
+ ; CHECK-NEXT: movn $6, $5, $[[T2]]
; CHECK: move $2, $6
%cond = icmp ne i1 %j, 0
%res = select i1 %cond, i1 %k, i1 %l
@@ -24,7 +25,8 @@ entry:
; CHECK-DAG: seb $[[T1:[0-9]+]], $zero
; CHECK: xor $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; CHECK-NEXT: sltu $[[T3:[0-9]+]], $zero, $[[T2]]
- ; CHECK-NEXT: movn $6, $5, $[[T3]]
+ ; CHECK-NEXT: andi $[[T4:[0-9]+]], $[[T3]], 1
+ ; CHECK-NEXT: movn $6, $5, $[[T4]]
; CHECK: move $2, $6
%cond = icmp ne i8 %j, 0
%res = select i1 %cond, i8 %k, i8 %l
@@ -40,7 +42,8 @@ entry:
; CHECK-DAG: seh $[[T1:[0-9]+]], $zero
; CHECK: xor $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; CHECK-NEXT: sltu $[[T3:[0-9]+]], $zero, $[[T2]]
- ; CHECK-NEXT: movn $6, $5, $[[T3]]
+ ; CHECK-NEXT: andi $[[T4:[0-9]+]], $[[T3]], 1
+ ; CHECK-NEXT: movn $6, $5, $[[T4]]
; CHECK: move $2, $6
%cond = icmp ne i16 %j, 0
%res = select i1 %cond, i16 %k, i16 %l
@@ -54,7 +57,8 @@ entry:
; FIXME: The following instruction is redundant.
; CHECK: xor $[[T0:[0-9]+]], $4, $zero
; CHECK-NEXT: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
- ; CHECK-NEXT: movn $6, $5, $[[T1]]
+ ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1
+ ; CHECK-NEXT: movn $6, $5, $[[T2]]
; CHECK: move $2, $6
%cond = icmp ne i32 %j, 0
%res = select i1 %cond, i32 %k, i32 %l
@@ -69,7 +73,8 @@ entry:
; CHECK-DAG: mtc1 $5, $f1
; CHECK-DAG: xor $[[T0:[0-9]+]], $4, $zero
; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
- ; CHECK: movn.s $f0, $f1, $[[T1]]
+ ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1
+ ; CHECK: movn.s $f0, $f1, $[[T2]]
%cond = icmp ne i32 %j, 0
%res = select i1 %cond, float %k, float %l
ret float %res
@@ -84,7 +89,8 @@ entry:
; CHECK-DAG: ldc1 $f0, 16($sp)
; CHECK-DAG: xor $[[T0:[0-9]+]], $4, $zero
; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
- ; CHECK: movn.d $f0, $f2, $[[T1]]
+ ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1
+ ; CHECK: movn.d $f0, $f2, $[[T2]]
%cond = icmp ne i32 %j, 0
%res = select i1 %cond, double %k, double %l
ret double %res
diff --git a/test/CodeGen/Mips/Fast-ISel/shftopm.ll b/test/CodeGen/Mips/Fast-ISel/shftopm.ll
index 90ddd190be13..bbea9c5566c5 100644
--- a/test/CodeGen/Mips/Fast-ISel/shftopm.ll
+++ b/test/CodeGen/Mips/Fast-ISel/shftopm.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 \
; RUN: -fast-isel-abort=1 -mcpu=mips32r2 < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 \
; RUN: -fast-isel-abort=1 -mcpu=mips32 < %s | FileCheck %s
@s1 = global i16 -89, align 2
diff --git a/test/CodeGen/Mips/Fast-ISel/shift.ll b/test/CodeGen/Mips/Fast-ISel/shift.ll
index df1c82700d59..9fe694bb5827 100644
--- a/test/CodeGen/Mips/Fast-ISel/shift.ll
+++ b/test/CodeGen/Mips/Fast-ISel/shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips32r2 -O1 -fast-isel=true -mips-fast-isel -filetype=obj %s -o - \
+; RUN: llc -march=mipsel -mcpu=mips32r2 -O0 -fast-isel=true -filetype=obj %s -o - \
; RUN: | llvm-objdump -arch mipsel -mcpu=mips32r2 -d - | FileCheck %s
; This test checks that encoding for srl is correct when fast-isel for mips32r2 is used.
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestore.ll b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
index bcb198b1a823..627a383f597c 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestore.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@abcd = external global i32
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
index f4b91d850255..62101d8ef7eb 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s -check-prefix=mips32r2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s -check-prefix=mips32
@f = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
index 83ccae0b1de5..67541b54bae7 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@ijk = external global i32
@@ -22,9 +22,10 @@ define void @si2_2() #0 {
entry:
store i32 -32768, i32* @ijk, align 4
; CHECK: .ent si2_2
-; CHECK: addiu $[[REG1:[0-9]+]], $zero, -32768
-; CHECK: lw $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
-; CHECK: sw $[[REG1]], 0($[[REG2]])
+; CHECK: lui $[[REG1:[0-9]+]], 65535
+; CHECK: ori $[[REG2:[0-9]+]], $[[REG1]], 32768
+; CHECK: lw $[[REG3:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK: sw $[[REG2]], 0($[[REG3]])
ret void
}
diff --git a/test/CodeGen/Mips/delay-slot-kill.ll b/test/CodeGen/Mips/delay-slot-kill.ll
index 57b630303c26..5e301441fd26 100644
--- a/test/CodeGen/Mips/delay-slot-kill.ll
+++ b/test/CodeGen/Mips/delay-slot-kill.ll
@@ -1,4 +1,6 @@
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s
+; We have to XFAIL this temporarily because of the reversion of r229675.
+; XFAIL: *
; Currently, the following IR assembly generates a KILL instruction between
; the bitwise-and instruction and the return instruction. We verify that the
diff --git a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
index 779620e10128..58dd16c9f9c8 100644
--- a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
+++ b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
@@ -1,10 +1,10 @@
; Check that register scavenging spill slot is close to $fp.
-; RUN: llc -march=mipsel -O0 < %s | FileCheck %s
+; RUN: llc -march=mipsel -O0 -fast-isel=false < %s | FileCheck %s
-; CHECK: sw ${{.*}}, 4($sp)
-; CHECK: lw ${{.*}}, 4($sp)
+; CHECK: sw ${{.*}}, 8($sp)
+; CHECK: lw ${{.*}}, 8($sp)
-define i32 @main(i32 signext %argc, i8** %argv) "no-frame-pointer-elim"="true" {
+define i32 @main(i32 signext %argc, i8** %argv) #0 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -30,3 +30,5 @@ entry:
store <16 x i8> %mul, <16 x i8>* %result, align 16
ret i32 0
}
+
+attributes #0 = { noinline optnone "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/Mips/llvm-ir/and.ll b/test/CodeGen/Mips/llvm-ir/and.ll
index 8ebcfe4a3f64..c4121701ec15 100644
--- a/test/CodeGen/Mips/llvm-ir/and.ll
+++ b/test/CodeGen/Mips/llvm-ir/and.ll
@@ -59,7 +59,10 @@ define signext i32 @and_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: and_i32:
- ; ALL: and $2, $4, $5
+ ; GP32: and $2, $4, $5
+
+ ; GP64: and $[[T0:[0-9]+]], $4, $5
+ ; GP64: sll $2, $[[T0]], 0
%r = and i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/Mips/llvm-ir/or.ll b/test/CodeGen/Mips/llvm-ir/or.ll
index 6215e4036325..8509d6ce93f3 100644
--- a/test/CodeGen/Mips/llvm-ir/or.ll
+++ b/test/CodeGen/Mips/llvm-ir/or.ll
@@ -59,7 +59,11 @@ define signext i32 @or_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: or_i32:
- ; ALL: or $2, $4, $5
+ ; GP32: or $2, $4, $5
+
+ ; GP64: or $[[T0:[0-9]+]], $4, $5
+ ; FIXME: The sll instruction below is redundant.
+ ; GP64: sll $2, $[[T0]], 0
%r = or i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/Mips/llvm-ir/xor.ll b/test/CodeGen/Mips/llvm-ir/xor.ll
index 89af99981a3c..d3cc57484895 100644
--- a/test/CodeGen/Mips/llvm-ir/xor.ll
+++ b/test/CodeGen/Mips/llvm-ir/xor.ll
@@ -59,7 +59,10 @@ define signext i32 @xor_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: xor_i32:
- ; ALL: xor $2, $4, $5
+ ; GP32: xor $2, $4, $5
+
+ ; GP64: xor $[[T0:[0-9]+]], $4, $5
+ ; GP64: sll $2, $[[T0]], 0
%r = xor i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll b/test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll
new file mode 100644
index 000000000000..7742ffe33150
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define linkonce_odr double @test1() {
+entry:
+ %conv6.i.i = fptosi ppc_fp128 undef to i64
+ %conv.i = sitofp i64 %conv6.i.i to double
+ ret double %conv.i
+
+; CHECK-LABEL: @test1
+; CHECK: bl __fixtfdi
+; CHECK: fcfid
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
index 53b737ae9a0b..d10ea98cd1a7 100644
--- a/test/CodeGen/PowerPC/ppc64-patchpoint.ll
+++ b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -103,6 +103,21 @@ entry:
ret void
}
+; Trivial symbolic patchpoint codegen.
+
+declare i64 @foo(i64 %p1, i64 %p2)
+define i64 @trivial_symbolic_patchpoint_codegen(i64 %p1, i64 %p2) {
+entry:
+; CHECK-LABEL: trivial_symbolic_patchpoint_codegen:
+; CHECK: bl foo
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NOT: nop
+; CHECK: blr
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 9, i32 12, i8* bitcast (i64 (i64, i64)* @foo to i8*), i32 2, i64 %p1, i64 %p2)
+ ret i64 %result
+}
+
declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/PowerPC/pr24216.ll b/test/CodeGen/PowerPC/pr24216.ll
new file mode 100644
index 000000000000..4ab41985f5b1
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr24216.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case adapted from PR24216.
+
+define void @foo(<16 x i8>* nocapture readonly %in, <16 x i8>* nocapture %out) {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %in, align 16
+ %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5>
+ store <16 x i8> %1, <16 x i8>* %out, align 16
+ ret void
+}
+
+; CHECK: vperm
+; CHECK-NOT: vspltw
diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll
index 46d451ff1573..65c47ada8750 100644
--- a/test/CodeGen/PowerPC/vec_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -202,7 +202,7 @@ entry:
; CHECK: VSLDOI_xx:
%tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK: vsldoi
+; CHECK: vsldoi {{[0-9]+}}, [[REG1:[0-9]+]], [[REG1]], 4
store <16 x i8> %tmp2, <16 x i8>* %A
ret void
}
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index f85acebeea67..dceb2516c696 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -70,10 +70,10 @@ entry:
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test5
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlxor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: vor
+; CHECK-FISL: vor
+; CHECK-FISL: xxlxor
+; CHECK-FISL: vor 2
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test5
@@ -133,10 +133,10 @@ entry:
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test8
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: vor
+; CHECK-FISL: vor
+; CHECK-FISL: xxlor
+; CHECK-FISL: vor 2
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test8
@@ -196,10 +196,10 @@ entry:
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test11
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxland 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: vor
+; CHECK-FISL: vor
+; CHECK-FISL: xxland
+; CHECK-FISL: vor 2
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test11
@@ -260,17 +260,14 @@ entry:
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test14
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlnor 36, 36, 37
+; CHECK-FISL: vor 4, 3, 3
+; CHECK-FISL: vor 5, 2, 2
+; CHECK-FISL: xxlor 0, 37, 36
+; CHECK-FISL: xxlnor 36, 37, 36
; CHECK-FISL: vor 2, 4, 4
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: stxvd2x 0, 1, 0
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test14
@@ -347,15 +344,13 @@ entry:
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test17
-; CHECK-FISL: vspltisb 4, -1
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: xxlxor 37, 37, 32
-; CHECK-FISL: vor 3, 5, 5
+; CHECK-FISL: vor 4, 3, 3
; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: xxland 37, 37, 32
-; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: vspltisb 2, -1
+; CHECK-FISL: vor 0, 2, 2
+; CHECK-FISL: xxlxor 36, 36, 32
+; CHECK-FISL: xxland 36, 37, 36
+; CHECK-FISL: vor 2, 4, 4
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test17
@@ -434,12 +429,18 @@ entry:
; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
; CHECK-REG: blr
+; FIXME: The fast-isel code is pretty miserable for this one.
+
; CHECK-FISL-LABEL: @test20
-; CHECK-FISL: vcmpequw 4, 4, 5
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: vor 1, 2, 2
-; CHECK-FISL: vor 6, 4, 4
-; CHECK-FISL: xxsel 32, 32, 33, 38
+; CHECK-FISL: vor 0, 5, 5
+; CHECK-FISL: vor 1, 4, 4
+; CHECK-FISL: vor 6, 3, 3
+; CHECK-FISL: vor 7, 2, 2
+; CHECK-FISL: vor 2, 1, 1
+; CHECK-FISL: vor 3, 0, 0
+; CHECK-FISL: vcmpequw 2, 2, 3
+; CHECK-FISL: vor 0, 2, 2
+; CHECK-FISL: xxsel 32, 38, 39, 32
; CHECK-FISL: vor 2, 0, 0
; CHECK-FISL: blr
@@ -794,8 +795,6 @@ define <4 x i32> @test34(<4 x i32>* %a) {
; CHECK-FISL-LABEL: @test34
; CHECK-FISL: lxvw4x 0, 0, 3
; CHECK-FISL: xxlor 34, 0, 0
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: vor 2, 3, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test34
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 84bbdd75b0f7..6c89b1092bdf 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -8,9 +8,9 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi0
; CHECK: lxvd2x 0, 0, 3
-; CHECK: lxsdx 34, 0, 4
+; CHECK: lxsdx 1, 0, 4
; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 34, 0
+; CHECK: xxspltd 1, 1, 0
; CHECK: xxpermdi 34, 0, 1, 1
}
@@ -22,9 +22,9 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi1
; CHECK: lxvd2x 0, 0, 3
-; CHECK: lxsdx 34, 0, 4
+; CHECK: lxsdx 1, 0, 4
; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 34, 0
+; CHECK: xxspltd 1, 1, 0
; CHECK: xxmrgld 34, 1, 0
}
diff --git a/test/CodeGen/PowerPC/xvcmpeqdp-v2f64.ll b/test/CodeGen/PowerPC/xvcmpeqdp-v2f64.ll
new file mode 100644
index 000000000000..ef63233e746b
--- /dev/null
+++ b/test/CodeGen/PowerPC/xvcmpeqdp-v2f64.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @__fmax_double3_3D_exec() #0 {
+entry:
+ br i1 undef, label %if.then.i, label %fmax_double3.exit
+
+if.then.i: ; preds = %entry
+ %cmp24.i.i = fcmp ord <3 x double> undef, zeroinitializer
+ %sext25.i.i = sext <3 x i1> %cmp24.i.i to <3 x i64>
+ %neg.i.i = xor <3 x i64> %sext25.i.i, <i64 -1, i64 -1, i64 -1>
+ %or.i.i = or <3 x i64> undef, %neg.i.i
+ %neg.i.i.i = select <3 x i1> undef, <3 x i64> zeroinitializer, <3 x i64> %sext25.i.i
+ %and.i.i.i = and <3 x i64> undef, %neg.i.i.i
+ %and26.i.i.i = and <3 x i64> undef, %or.i.i
+ %or.i.i.i = or <3 x i64> %and.i.i.i, %and26.i.i.i
+ %astype32.i.i.i = bitcast <3 x i64> %or.i.i.i to <3 x double>
+ %extractVec33.i.i.i = shufflevector <3 x double> %astype32.i.i.i, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ store <4 x double> %extractVec33.i.i.i, <4 x double>* undef, align 32
+ br label %fmax_double3.exit
+
+; CHECK-LABEL: @__fmax_double3_3D_exec
+; CHECK: xvcmpeqdp
+
+fmax_double3.exit: ; preds = %if.then.i, %entry
+ br i1 undef, label %if.then, label %do.end
+
+if.then: ; preds = %fmax_double3.exit
+ unreachable
+
+do.end: ; preds = %fmax_double3.exit
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 1178bb4dafdf..48a2cf491049 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
store fp128 %y, fp128 *%r2
ret void
}
+
+; Explicit fp128 return values are likewise passed indirectly.
+define fp128 @f14(fp128 %r3) {
+; CHECK-LABEL: f14:
+; CHECK: ld %f0, 0(%r3)
+; CHECK: ld %f2, 8(%r3)
+; CHECK: axbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %y = fadd fp128 %r3, %r3
+ ret fp128 %y
+}
+
diff --git a/test/CodeGen/SystemZ/args-07.ll b/test/CodeGen/SystemZ/args-07.ll
new file mode 100644
index 000000000000..29d9b319ffc0
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-07.ll
@@ -0,0 +1,60 @@
+; Test multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Up to four integer return values fit into GPRs.
+define { i64, i64, i64, i64 } @f1() {
+; CHECK-LABEL: f1:
+; CHECK: lghi %r2, 0
+; CHECK: lghi %r3, 1
+; CHECK: lghi %r4, 2
+; CHECK: lghi %r5, 3
+; CHECK: br %r14
+ ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 }
+}
+
+; More than four integer return values use sret.
+define { i64, i64, i64, i64, i64 } @f2() {
+; CHECK-LABEL: f2:
+; CHECK: mvghi 32(%r2), 4
+; CHECK: mvghi 24(%r2), 3
+; CHECK: mvghi 16(%r2), 2
+; CHECK: mvghi 8(%r2), 1
+; CHECK: mvghi 0(%r2), 0
+; CHECK: br %r14
+ ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 }
+}
+
+; Up to four floating-point return values fit into FPRs.
+define { double, double, double, double } @f3() {
+; CHECK-LABEL: f3:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f0, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f2, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f4, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f6, 0([[TMP]])
+; CHECK: br %r14
+ ret { double, double, double, double }
+ { double 1.0, double 2.0, double 3.0, double 4.0 }
+}
+
+; More than four floating-point return values use sret.
+define { double, double, double, double, double } @f4() {
+; CHECK-LABEL: f4:
+; CHECK: llihh [[TMP:%r[0-5]]], 16404
+; CHECK: stg [[TMP]], 32(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16400
+; CHECK: stg [[TMP]], 24(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16392
+; CHECK: stg [[TMP]], 16(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16384
+; CHECK: stg [[TMP]], 8(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16368
+; CHECK: stg [[TMP]], 0(%r2)
+; CHECK: br %r14
+ ret { double, double, double, double, double }
+ { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 }
+}
diff --git a/test/CodeGen/SystemZ/args-08.ll b/test/CodeGen/SystemZ/args-08.ll
new file mode 100644
index 000000000000..0bad5a8989dc
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-08.ll
@@ -0,0 +1,57 @@
+; Test calling functions with multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Up to four integer return values fit into GPRs.
+declare { i64, i64, i64, i64 } @bar1()
+
+define i64 @f1() {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, bar1
+; CHECK: lgr %r2, %r5
+; CHECK: br %r14
+ %mret = call { i64, i64, i64, i64 } @bar1()
+ %ret = extractvalue { i64, i64, i64, i64 } %mret, 3
+ ret i64 %ret
+}
+
+; More than four integer return values use sret.
+declare { i64, i64, i64, i64, i64 } @bar2()
+
+define i64 @f2() {
+; CHECK-LABEL: f2:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar2
+; CHECK: lg %r2, 192(%r15)
+; CHECK: br %r14
+ %mret = call { i64, i64, i64, i64, i64 } @bar2()
+ %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4
+ ret i64 %ret
+}
+
+; Up to four floating-point return values fit into GPRs.
+declare { double, double, double, double } @bar3()
+
+define double @f3() {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, bar3
+; CHECK: ldr %f0, %f6
+; CHECK: br %r14
+ %mret = call { double, double, double, double } @bar3()
+ %ret = extractvalue { double, double, double, double } %mret, 3
+ ret double %ret
+}
+
+; More than four integer return values use sret.
+declare { double, double, double, double, double } @bar4()
+
+define double @f4() {
+; CHECK-LABEL: f4:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar4
+; CHECK: ld %f0, 192(%r15)
+; CHECK: br %r14
+ %mret = call { double, double, double, double, double } @bar4()
+ %ret = extractvalue { double, double, double, double, double } %mret, 4
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-args-06.ll b/test/CodeGen/SystemZ/vec-args-06.ll
new file mode 100644
index 000000000000..b26131ca1d4e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-06.ll
@@ -0,0 +1,83 @@
+; Test multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Up to eight vector return values fit into VRs.
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() {
+; CHECK-LABEL: f1:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v24, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v26, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v28, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v30, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v25, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v27, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v29, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v31, 0([[TMP]])
+; CHECK: br %r14
+ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double> }
+ { <2 x double> <double 1.0, double 1.1>,
+ <2 x double> <double 2.0, double 2.1>,
+ <2 x double> <double 3.0, double 3.1>,
+ <2 x double> <double 4.0, double 4.1>,
+ <2 x double> <double 5.0, double 5.1>,
+ <2 x double> <double 6.0, double 6.1>,
+ <2 x double> <double 7.0, double 7.1>,
+ <2 x double> <double 8.0, double 8.1> }
+}
+
+; More than eight vector return values use sret.
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double> } @f2() {
+; CHECK-LABEL: f2:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 128(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 112(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 96(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 80(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 64(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 48(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 32(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 16(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 0(%r2)
+; CHECK: br %r14
+ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double> }
+ { <2 x double> <double 1.0, double 1.1>,
+ <2 x double> <double 2.0, double 2.1>,
+ <2 x double> <double 3.0, double 3.1>,
+ <2 x double> <double 4.0, double 4.1>,
+ <2 x double> <double 5.0, double 5.1>,
+ <2 x double> <double 6.0, double 6.1>,
+ <2 x double> <double 7.0, double 7.1>,
+ <2 x double> <double 8.0, double 8.1>,
+ <2 x double> <double 9.0, double 9.1> }
+}
diff --git a/test/CodeGen/SystemZ/vec-args-07.ll b/test/CodeGen/SystemZ/vec-args-07.ll
new file mode 100644
index 000000000000..f0b5e6835cfe
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-07.ll
@@ -0,0 +1,47 @@
+; Test calling functions with multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Up to eight vector return values fit into VRs.
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1()
+
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, bar1
+; CHECK: vlr %v24, %v31
+; CHECK: br %r14
+ %mret = call { <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double> } @bar1()
+ %ret = extractvalue { <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double> } %mret, 7
+ ret <2 x double> %ret
+}
+
+; More than eight vector return values use sret.
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+ <2 x double> } @bar2()
+
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar2
+; CHECK: vl %v24, 288(%r15)
+; CHECK: br %r14
+ %mret = call { <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double> } @bar2()
+ %ret = extractvalue { <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double>, <2 x double>,
+ <2 x double> } %mret, 8
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
index 34eac62e3673..b65e9d01ab8b 100644
--- a/test/CodeGen/X86/fdiv-combine.ll
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -44,5 +44,24 @@ define double @div3_arcp(double %x, double %y, double %z) #0 {
ret double %ret
}
+define void @PR24141() #0 {
+; CHECK-LABEL: PR24141:
+; CHECK: callq
+; CHECK-NEXT: divsd
+; CHECK-NEXT: jmp
+entry:
+ br label %while.body
+
+while.body:
+ %x.0 = phi double [ undef, %entry ], [ %div, %while.body ]
+ %call = call { double, double } @g(double %x.0)
+ %xv0 = extractvalue { double, double } %call, 0
+ %xv1 = extractvalue { double, double } %call, 1
+ %div = fdiv double %xv0, %xv1
+ br label %while.body
+}
+
+declare { double, double } @g(double)
+
; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/machine-trace-metrics-crash.ll b/test/CodeGen/X86/machine-trace-metrics-crash.ll
new file mode 100644
index 000000000000..1d0ee79f04a9
--- /dev/null
+++ b/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s
+
+; The debug info in this test case was causing a crash because machine trace metrics
+; did not correctly ignore debug instructions. The check lines ensure that the
+; machine-combiner pass has run, reassociated the add operands, and therefore
+; used machine trace metrics.
+
+define void @PR24199() {
+; CHECK-LABEL: PR24199:
+; CHECK: addss %xmm1, %xmm0
+; CHECK: addss %xmm2, %xmm0
+
+entry:
+ %i = alloca %struct.A, align 8
+ %tobool = icmp ne i32 undef, 0
+ br i1 undef, label %if.end, label %if.then
+
+if.then:
+ br label %if.end
+
+if.end:
+ %h = phi float [ 0.0, %if.then ], [ 4.0, %entry ]
+ call void @foo(%struct.A* nonnull undef)
+ tail call void @llvm.dbg.value(metadata %struct.A* undef, i64 0, metadata !5, metadata !4), !dbg !6
+ tail call void @llvm.dbg.value(metadata float %h, i64 0, metadata !5, metadata !4), !dbg !6
+ %n0 = load float, float* undef, align 4
+ %mul = fmul fast float %n0, %h
+ %add = fadd fast float %mul, 1.0
+ tail call void @llvm.dbg.value(metadata %struct.A* undef, i64 0, metadata !5, metadata !4), !dbg !6
+ tail call void @llvm.dbg.value(metadata float %add, i64 0, metadata !5, metadata !4), !dbg !6
+ %add.i = fadd fast float %add, %n0
+ store float %add.i, float* undef, align 4
+ %n1 = bitcast %struct.A* %i to i8*
+ call void @llvm.lifetime.start(i64 16, i8* %n1)
+ %n2 = load <2 x float>, <2 x float>* undef, align 8
+ %conv = uitofp i1 %tobool to float
+ %bitcast = extractelement <2 x float> %n2, i32 0
+ %factor = fmul fast float %bitcast, 2.0
+ %add3 = fadd fast float %factor, %conv
+ call void @bar(float %add3)
+ ret void
+}
+
+%struct.A = type { float, float }
+
+declare void @bar(float)
+declare void @foo(%struct.A*)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: 1)
+!1 = !DIFile(filename: "24199.cpp", directory: "/bin")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DISubprogram(linkageName: "foo", file: !1, line: 18, isLocal: false, isDefinition: true, scopeLine: 18, function: void (%struct.A*)* @foo)
+!4 = !DIExpression()
+!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !3, flags: DIFlagArtificial | DIFlagObjectPointer)
+!6 = !DILocation(line: 0, scope: !3)
+
+
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 9a162d77ef48..095ab831d48d 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,15 +1,24 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; PR2656
-; CHECK: {{xorps.*sp}}
-; CHECK-NOT: {{xorps.*sp}}
-
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9.4.0"
%struct.anon = type <{ float, float }>
@.str = internal constant [17 x i8] c"pt: %.0f, %.0f\0A\00\00" ; <[17 x i8]*> [#uses=1]
+; We can not fold either stack load into an 'xor' instruction because that
+; would change what should be a 4-byte load into a 16-byte load.
+; We can fold the 16-byte constant load into either 'xor' instruction,
+; but we do not. It has more than one use, so it gets loaded into a register.
+
define void @foo(%struct.anon* byval %p) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: movaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: xorps %xmm2, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm2, %xmm1
entry:
%tmp = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 0 ; <float*> [#uses=1]
%tmp1 = load float, float* %tmp ; <float> [#uses=1]
@@ -24,3 +33,20 @@ entry:
}
declare i32 @printf(...)
+
+; We can not fold the load from the stack into the 'and' instruction because
+; that changes an 8-byte load into a 16-byte load (illegal memory access).
+; We can fold the load of the constant because it is a 16-byte vector constant.
+
+define double @PR22371(double %x) {
+; CHECK-LABEL: PR22371:
+; CHECK: movsd 16(%esp), %xmm0
+; CHECK-NEXT: andpd LCPI1_0, %xmm0
+; CHECK-NEXT: movlpd %xmm0, (%esp)
+ %call = tail call double @fabs(double %x) #0
+ ret double %call
+}
+
+declare double @fabs(double) #0
+attributes #0 = { readnone }
+
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index 25634b5472aa..8a5462bea82d 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -55,12 +55,12 @@ declare double @copysign(double, double)
define float @int1(float %a, float %b) {
; X32-LABEL: @int1
-; X32: movss 12(%esp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: movss 8(%esp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: andps .LCPI2_0, %xmm1
-; X32-NEXT: andps .LCPI2_1, %xmm0
-; X32-NEXT: orps %xmm1, %xmm0
-; X32-NEXT: movss %xmm0, (%esp)
+; X32: movss 8(%esp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: andps .LCPI2_0, %xmm0
+; X32-NEXT: movss 12(%esp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: andps .LCPI2_1, %xmm1
+; X32-NEXT: orps %xmm0, %xmm1
+; X32-NEXT: movss %xmm1, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
@@ -76,14 +76,14 @@ define float @int1(float %a, float %b) {
define double @int2(double %a, float %b, float %c) {
; X32-LABEL: @int2
-; X32: movsd 8(%ebp), %xmm0 {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: movss 16(%ebp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: addss 20(%ebp), %xmm1
-; X32-NEXT: andpd .LCPI3_0, %xmm0
-; X32-NEXT: cvtss2sd %xmm1, %xmm1
-; X32-NEXT: andpd .LCPI3_1, %xmm1
-; X32-NEXT: orpd %xmm0, %xmm1
-; X32-NEXT: movsd %xmm1, (%esp)
+; X32: movss 16(%ebp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: addss 20(%ebp), %xmm0
+; X32-NEXT: movsd 8(%ebp), %xmm1 {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: andpd .LCPI3_0, %xmm1
+; X32-NEXT: cvtss2sd %xmm0, %xmm0
+; X32-NEXT: andpd .LCPI3_1, %xmm0
+; X32-NEXT: orpd %xmm1, %xmm0
+; X32-NEXT: movlpd %xmm0, (%esp)
; X32-NEXT: fldl (%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
@@ -91,9 +91,9 @@ define double @int2(double %a, float %b, float %c) {
;
; X64-LABEL: @int2
; X64: addss %xmm2, %xmm1
-; X64-NEXT: andpd .LCPI3_0(%rip), %xmm0
; X64-NEXT: cvtss2sd %xmm1, %xmm1
-; X64-NEXT: andpd .LCPI3_1(%rip), %xmm1
+; X64-NEXT: andpd .LCPI3_0(%rip), %xmm1
+; X64-NEXT: andpd .LCPI3_1(%rip), %xmm0
; X64-NEXT: orpd %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = fadd float %b, %c
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
index bfefbcf5ebd3..960b5f27cf53 100644
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -4,7 +4,7 @@
define <2 x double> @fabs_v2f64(<2 x double> %p)
{
; CHECK-LABEL: fabs_v2f64
- ; CHECK: vandps
+ ; CHECK: vandpd
%t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
ret <2 x double> %t
}
@@ -22,7 +22,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
define <4 x double> @fabs_v4f64(<4 x double> %p)
{
; CHECK-LABEL: fabs_v4f64
- ; CHECK: vandps
+ ; CHECK: vandpd
%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
ret <4 x double> %t
}
diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll
index bbf749c82ab9..df01775a12e6 100644
--- a/test/DebugInfo/Mips/delay-slot.ll
+++ b/test/DebugInfo/Mips/delay-slot.ll
@@ -13,12 +13,14 @@
; CHECK: Address Line Column File ISA Discriminator Flags
; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt prologue_end
-; CHECK: 0x0000000000000008 2 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000040 5 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence
+; FIXME: The next address probably ought to be 0x0000000000000004 but there's
+; a constant initialization before the prologue's end.
+; CHECK: 0x0000000000000008 2 0 1 0 0 is_stmt prologue_end
+; CHECK: 0x0000000000000028 3 0 1 0 0 is_stmt
+; CHECK: 0x0000000000000038 4 0 1 0 0 is_stmt
+; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt
+; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence
+
target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
target triple = "mips--linux-gnu"
diff --git a/test/MC/AMDGPU/vopc.s b/test/MC/AMDGPU/vopc.s
index f44919a4f1e0..2d8547c5f953 100644
--- a/test/MC/AMDGPU/vopc.s
+++ b/test/MC/AMDGPU/vopc.s
@@ -1,5 +1,6 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
//===----------------------------------------------------------------------===//
// Generic Checks
@@ -7,23 +8,28 @@
// src0 sgpr
v_cmp_lt_f32 vcc, s2, v4
-// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
+// SICI: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
+// VI: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x82,0x7c]
// src0 inline immediate
v_cmp_lt_f32 vcc, 0, v4
-// CHECK: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
+// SICI: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
+// VI: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x82,0x7c]
// src0 literal
v_cmp_lt_f32 vcc, 10.0, v4
-// CHECK: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
+// SICI: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
+// VI: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x82,0x7c,0x00,0x00,0x20,0x41]
// src0, src1 max vgpr
v_cmp_lt_f32 vcc, v255, v255
-// CHECK: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
+// SICI: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
+// VI: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x83,0x7c]
// force 32-bit encoding
v_cmp_lt_f32_e32 vcc, v2, v4
-// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+// SICI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+// VI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x82,0x7c]
//===----------------------------------------------------------------------===//
@@ -31,10 +37,12 @@ v_cmp_lt_f32_e32 vcc, v2, v4
//===----------------------------------------------------------------------===//
v_cmp_f_f32 vcc, v2, v4
-// CHECK: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
+// SICI: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
+// VI: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x80,0x7c]
v_cmp_lt_f32 vcc, v2, v4
-// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+// SICI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+// VI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x82,0x7c]
// TODO: Add tests for the rest of the instructions.
diff --git a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
new file mode 100644
index 000000000000..f154e00ff51c
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
@@ -0,0 +1,664 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64le-unknown-unknown -mcpu=pwr7 | FileCheck %s
+
+# FIXME: test b target
+
+# FIXME: test ba target
+
+# FIXME: test bl target
+
+# FIXME: test bla target
+
+# FIXME: test bc 4, 10, target
+
+# FIXME: test bca 4, 10, target
+
+# FIXME: test bcl 4, 10, target
+
+# FIXME: test bcla 4, 10, target
+
+# CHECK: bclr 4, 10, 3
+0x20 0x18 0x8a 0x4c
+
+# CHECK: bclr 4, 10
+0x20 0x00 0x8a 0x4c
+
+# CHECK: bclrl 4, 10, 3
+0x21 0x18 0x8a 0x4c
+
+# CHECK: bclrl 4, 10
+0x21 0x00 0x8a 0x4c
+
+# CHECK: bcctr 4, 10, 3
+0x20 0x1c 0x8a 0x4c
+
+# CHECK: bcctr 4, 10
+0x20 0x04 0x8a 0x4c
+
+# CHECK: bcctrl 4, 10, 3
+0x21 0x1c 0x8a 0x4c
+
+# CHECK: bcctrl 4, 10
+0x21 0x04 0x8a 0x4c
+
+# CHECK: crand 2, 3, 4
+0x02 0x22 0x43 0x4c
+
+# CHECK: crnand 2, 3, 4
+0xc2 0x21 0x43 0x4c
+
+# CHECK: cror 2, 3, 4
+0x82 0x23 0x43 0x4c
+
+# CHECK: crxor 2, 3, 4
+0x82 0x21 0x43 0x4c
+
+# CHECK: crnor 2, 3, 4
+0x42 0x20 0x43 0x4c
+
+# CHECK: creqv 2, 3, 4
+0x42 0x22 0x43 0x4c
+
+# CHECK: crandc 2, 3, 4
+0x02 0x21 0x43 0x4c
+
+# CHECK: crorc 2, 3, 4
+0x42 0x23 0x43 0x4c
+
+# CHECK: mcrf 2, 3
+0x00 0x00 0x0c 0x4d
+
+# CHECK: sc 1
+0x22 0x00 0x00 0x44
+
+# CHECK: sc
+0x02 0x00 0x00 0x44
+
+# CHECK: clrbhrb
+0x5c 0x03 0x00 0x7c
+
+# CHECK: mfbhrbe 9, 983
+0x5c 0xba 0x3e 0x7d
+
+# CHECK: rfebb 1
+0x24 0x09 0x00 0x4c
+
+# CHECK: lbz 2, 128(4)
+0x80 0x00 0x44 0x88
+
+# CHECK: lbzx 2, 3, 4
+0xae 0x20 0x43 0x7c
+
+# CHECK: lbzu 2, 128(4)
+0x80 0x00 0x44 0x8c
+
+# CHECK: lbzux 2, 3, 4
+0xee 0x20 0x43 0x7c
+
+# CHECK: lhz 2, 128(4)
+0x80 0x00 0x44 0xa0
+
+# CHECK: lhzx 2, 3, 4
+0x2e 0x22 0x43 0x7c
+
+# CHECK: lhzu 2, 128(4)
+0x80 0x00 0x44 0xa4
+
+# CHECK: lhzux 2, 3, 4
+0x6e 0x22 0x43 0x7c
+
+# CHECK: lha 2, 128(4)
+0x80 0x00 0x44 0xa8
+
+# CHECK: lhax 2, 3, 4
+0xae 0x22 0x43 0x7c
+
+# CHECK: lhau 2, 128(4)
+0x80 0x00 0x44 0xac
+
+# CHECK: lhaux 2, 3, 4
+0xee 0x22 0x43 0x7c
+
+# CHECK: lwz 2, 128(4)
+0x80 0x00 0x44 0x80
+
+# CHECK: lwzx 2, 3, 4
+0x2e 0x20 0x43 0x7c
+
+# CHECK: lwzu 2, 128(4)
+0x80 0x00 0x44 0x84
+
+# CHECK: lwzux 2, 3, 4
+0x6e 0x20 0x43 0x7c
+
+# CHECK: lwa 2, 128(4)
+0x82 0x00 0x44 0xe8
+
+# CHECK: lwax 2, 3, 4
+0xaa 0x22 0x43 0x7c
+
+# CHECK: lwaux 2, 3, 4
+0xea 0x22 0x43 0x7c
+
+# CHECK: ld 2, 128(4)
+0x80 0x00 0x44 0xe8
+
+# CHECK: ldx 2, 3, 4
+0x2a 0x20 0x43 0x7c
+
+# CHECK: ldu 2, 128(4)
+0x81 0x00 0x44 0xe8
+
+# CHECK: ldux 2, 3, 4
+0x6a 0x20 0x43 0x7c
+
+# CHECK: stb 2, 128(4)
+0x80 0x00 0x44 0x98
+
+# CHECK: stbx 2, 3, 4
+0xae 0x21 0x43 0x7c
+
+# CHECK: stbu 2, 128(4)
+0x80 0x00 0x44 0x9c
+
+# CHECK: stbux 2, 3, 4
+0xee 0x21 0x43 0x7c
+
+# CHECK: sth 2, 128(4)
+0x80 0x00 0x44 0xb0
+
+# CHECK: sthx 2, 3, 4
+0x2e 0x23 0x43 0x7c
+
+# CHECK: sthu 2, 128(4)
+0x80 0x00 0x44 0xb4
+
+# CHECK: sthux 2, 3, 4
+0x6e 0x23 0x43 0x7c
+
+# CHECK: stw 2, 128(4)
+0x80 0x00 0x44 0x90
+
+# CHECK: stwx 2, 3, 4
+0x2e 0x21 0x43 0x7c
+
+# CHECK: stwu 2, 128(4)
+0x80 0x00 0x44 0x94
+
+# CHECK: stwux 2, 3, 4
+0x6e 0x21 0x43 0x7c
+
+# CHECK: std 2, 128(4)
+0x80 0x00 0x44 0xf8
+
+# CHECK: stdx 2, 3, 4
+0x2a 0x21 0x43 0x7c
+
+# CHECK: stdu 2, 128(4)
+0x81 0x00 0x44 0xf8
+
+# CHECK: stdux 2, 3, 4
+0x6a 0x21 0x43 0x7c
+
+# CHECK: lhbrx 2, 3, 4
+0x2c 0x26 0x43 0x7c
+
+# CHECK: sthbrx 2, 3, 4
+0x2c 0x27 0x43 0x7c
+
+# CHECK: lwbrx 2, 3, 4
+0x2c 0x24 0x43 0x7c
+
+# CHECK: stwbrx 2, 3, 4
+0x2c 0x25 0x43 0x7c
+
+# CHECK: ldbrx 2, 3, 4
+0x28 0x24 0x43 0x7c
+
+# CHECK: stdbrx 2, 3, 4
+0x28 0x25 0x43 0x7c
+
+# CHECK: lmw 2, 128(1)
+0x80 0x00 0x41 0xb8
+
+# CHECK: stmw 2, 128(1)
+0x80 0x00 0x41 0xbc
+
+# CHECK: addi 2, 3, 128
+0x80 0x00 0x43 0x38
+
+# CHECK: addis 2, 3, 128
+0x80 0x00 0x43 0x3c
+
+# CHECK: add 2, 3, 4
+0x14 0x22 0x43 0x7c
+
+# CHECK: add. 2, 3, 4
+0x15 0x22 0x43 0x7c
+
+# CHECK: subf 2, 3, 4
+0x50 0x20 0x43 0x7c
+
+# CHECK: subf. 2, 3, 4
+0x51 0x20 0x43 0x7c
+
+# CHECK: addic 2, 3, 128
+0x80 0x00 0x43 0x30
+
+# CHECK: addic. 2, 3, 128
+0x80 0x00 0x43 0x34
+
+# CHECK: subfic 2, 3, 4
+0x04 0x00 0x43 0x20
+
+# CHECK: addc 2, 3, 4
+0x14 0x20 0x43 0x7c
+
+# CHECK: addc. 2, 3, 4
+0x15 0x20 0x43 0x7c
+
+# CHECK: subfc 2, 3, 4
+0x10 0x20 0x43 0x7c
+
+# CHECK: subfc 2, 3, 4
+0x10 0x20 0x43 0x7c
+
+# CHECK: adde 2, 3, 4
+0x14 0x21 0x43 0x7c
+
+# CHECK: adde. 2, 3, 4
+0x15 0x21 0x43 0x7c
+
+# CHECK: subfe 2, 3, 4
+0x10 0x21 0x43 0x7c
+
+# CHECK: subfe. 2, 3, 4
+0x11 0x21 0x43 0x7c
+
+# CHECK: addme 2, 3
+0xd4 0x01 0x43 0x7c
+
+# CHECK: addme. 2, 3
+0xd5 0x01 0x43 0x7c
+
+# CHECK: subfme 2, 3
+0xd0 0x01 0x43 0x7c
+
+# CHECK: subfme. 2, 3
+0xd1 0x01 0x43 0x7c
+
+# CHECK: addze 2, 3
+0x94 0x01 0x43 0x7c
+
+# CHECK: addze. 2, 3
+0x95 0x01 0x43 0x7c
+
+# CHECK: subfze 2, 3
+0x90 0x01 0x43 0x7c
+
+# CHECK: subfze. 2, 3
+0x91 0x01 0x43 0x7c
+
+# CHECK: neg 2, 3
+0xd0 0x00 0x43 0x7c
+
+# CHECK: neg. 2, 3
+0xd1 0x00 0x43 0x7c
+
+# CHECK: mulli 2, 3, 128
+0x80 0x00 0x43 0x1c
+
+# CHECK: mulhw 2, 3, 4
+0x96 0x20 0x43 0x7c
+
+# CHECK: mulhw. 2, 3, 4
+0x97 0x20 0x43 0x7c
+
+# CHECK: mullw 2, 3, 4
+0xd6 0x21 0x43 0x7c
+
+# CHECK: mullw. 2, 3, 4
+0xd7 0x21 0x43 0x7c
+
+# CHECK: mulhwu 2, 3, 4
+0x16 0x20 0x43 0x7c
+
+# CHECK: mulhwu. 2, 3, 4
+0x17 0x20 0x43 0x7c
+
+# CHECK: divw 2, 3, 4
+0xd6 0x23 0x43 0x7c
+
+# CHECK: divw. 2, 3, 4
+0xd7 0x23 0x43 0x7c
+
+# CHECK: divwu 2, 3, 4
+0x96 0x23 0x43 0x7c
+
+# CHECK: divwu. 2, 3, 4
+0x97 0x23 0x43 0x7c
+
+# CHECK: divwe 2, 3, 4
+0x56 0x23 0x43 0x7c
+
+# CHECK: divwe. 2, 3, 4
+0x57 0x23 0x43 0x7c
+
+# CHECK: divweu 2, 3, 4
+0x16 0x23 0x43 0x7c
+
+# CHECK: divweu. 2, 3, 4
+0x17 0x23 0x43 0x7c
+
+# CHECK: mulld 2, 3, 4
+0xd2 0x21 0x43 0x7c
+
+# CHECK: mulld. 2, 3, 4
+0xd3 0x21 0x43 0x7c
+
+# CHECK: mulhd 2, 3, 4
+0x92 0x20 0x43 0x7c
+
+# CHECK: mulhd. 2, 3, 4
+0x93 0x20 0x43 0x7c
+
+# CHECK: mulhdu 2, 3, 4
+0x12 0x20 0x43 0x7c
+
+# CHECK: mulhdu. 2, 3, 4
+0x13 0x20 0x43 0x7c
+
+# CHECK: divd 2, 3, 4
+0xd2 0x23 0x43 0x7c
+
+# CHECK: divd. 2, 3, 4
+0xd3 0x23 0x43 0x7c
+
+# CHECK: divdu 2, 3, 4
+0x92 0x23 0x43 0x7c
+
+# CHECK: divdu. 2, 3, 4
+0x93 0x23 0x43 0x7c
+
+# CHECK: divde 2, 3, 4
+0x52 0x23 0x43 0x7c
+
+# CHECK: divde. 2, 3, 4
+0x53 0x23 0x43 0x7c
+
+# CHECK: divdeu 2, 3, 4
+0x12 0x23 0x43 0x7c
+
+# CHECK: divdeu. 2, 3, 4
+0x13 0x23 0x43 0x7c
+
+# CHECK: cmpdi 2, 3, 128
+0x80 0x00 0x23 0x2d
+
+# CHECK: cmpd 2, 3, 4
+0x00 0x20 0x23 0x7d
+
+# CHECK: cmpldi 2, 3, 128
+0x80 0x00 0x23 0x29
+
+# CHECK: cmpld 2, 3, 4
+0x40 0x20 0x23 0x7d
+
+# CHECK: cmpwi 2, 3, 128
+0x80 0x00 0x03 0x2d
+
+# CHECK: cmpw 2, 3, 4
+0x00 0x20 0x03 0x7d
+
+# CHECK: cmplwi 2, 3, 128
+0x80 0x00 0x03 0x29
+
+# CHECK: cmplw 2, 3, 4
+0x40 0x20 0x03 0x7d
+
+# CHECK: twllti 3, 4
+0x04 0x00 0x43 0x0c
+
+# CHECK: twllt 3, 4
+0x08 0x20 0x43 0x7c
+
+# CHECK: tdllti 3, 4
+0x04 0x00 0x43 0x08
+
+# CHECK: tdllt 3, 4
+0x88 0x20 0x43 0x7c
+
+# CHECK: isel 2, 3, 4, 5
+0x5e 0x21 0x43 0x7c
+
+# CHECK: andi. 2, 3, 128
+0x80 0x00 0x62 0x70
+
+# CHECK: andis. 2, 3, 128
+0x80 0x00 0x62 0x74
+
+# CHECK: ori 2, 3, 128
+0x80 0x00 0x62 0x60
+
+# CHECK: oris 2, 3, 128
+0x80 0x00 0x62 0x64
+
+# CHECK: xori 2, 3, 128
+0x80 0x00 0x62 0x68
+
+# CHECK: xoris 2, 3, 128
+0x80 0x00 0x62 0x6c
+
+# CHECK: and 2, 3, 4
+0x38 0x20 0x62 0x7c
+
+# CHECK: and. 2, 3, 4
+0x39 0x20 0x62 0x7c
+
+# CHECK: xor 2, 3, 4
+0x78 0x22 0x62 0x7c
+
+# CHECK: xor. 2, 3, 4
+0x79 0x22 0x62 0x7c
+
+# CHECK: nand 2, 3, 4
+0xb8 0x23 0x62 0x7c
+
+# CHECK: nand. 2, 3, 4
+0xb9 0x23 0x62 0x7c
+
+# CHECK: or 2, 3, 4
+0x78 0x23 0x62 0x7c
+
+# CHECK: or. 2, 3, 4
+0x79 0x23 0x62 0x7c
+
+# CHECK: nor 2, 3, 4
+0xf8 0x20 0x62 0x7c
+
+# CHECK: nor. 2, 3, 4
+0xf9 0x20 0x62 0x7c
+
+# CHECK: eqv 2, 3, 4
+0x38 0x22 0x62 0x7c
+
+# CHECK: eqv. 2, 3, 4
+0x39 0x22 0x62 0x7c
+
+# CHECK: andc 2, 3, 4
+0x78 0x20 0x62 0x7c
+
+# CHECK: andc. 2, 3, 4
+0x79 0x20 0x62 0x7c
+
+# CHECK: orc 2, 3, 4
+0x38 0x23 0x62 0x7c
+
+# CHECK: orc. 2, 3, 4
+0x39 0x23 0x62 0x7c
+
+# CHECK: extsb 2, 3
+0x74 0x07 0x62 0x7c
+
+# CHECK: extsb. 2, 3
+0x75 0x07 0x62 0x7c
+
+# CHECK: extsh 2, 3
+0x34 0x07 0x62 0x7c
+
+# CHECK: extsh. 2, 3
+0x35 0x07 0x62 0x7c
+
+# CHECK: cntlz 2, 3
+0x34 0x00 0x62 0x7c
+
+# CHECK: cntlz. 2, 3
+0x35 0x00 0x62 0x7c
+
+# CHECK: popcntw 2, 3
+0xf4 0x02 0x62 0x7c
+
+# CHECK: extsw 2, 3
+0xb4 0x07 0x62 0x7c
+
+# CHECK: extsw. 2, 3
+0xb5 0x07 0x62 0x7c
+
+# CHECK: cntlzd 2, 3
+0x74 0x00 0x62 0x7c
+
+# CHECK: cntlzd. 2, 3
+0x75 0x00 0x62 0x7c
+
+# CHECK: popcntd 2, 3
+0xf4 0x03 0x62 0x7c
+
+# CHECK: bpermd 2, 3, 4
+0xf8 0x21 0x62 0x7c
+
+# CHECK: cmpb 7, 21, 4
+0xf8 0x23 0xa7 0x7e
+
+# CHECK: rlwinm 2, 3, 4, 5, 6
+0x4c 0x21 0x62 0x54
+
+# CHECK: rlwinm. 2, 3, 4, 5, 6
+0x4d 0x21 0x62 0x54
+
+# CHECK: rlwnm 2, 3, 4, 5, 6
+0x4c 0x21 0x62 0x5c
+
+# CHECK: rlwnm. 2, 3, 4, 5, 6
+0x4d 0x21 0x62 0x5c
+
+# CHECK: rlwimi 2, 3, 4, 5, 6
+0x4c 0x21 0x62 0x50
+
+# CHECK: rlwimi. 2, 3, 4, 5, 6
+0x4d 0x21 0x62 0x50
+
+# CHECK: rldicl 2, 3, 4, 5
+0x40 0x21 0x62 0x78
+
+# CHECK: rldicl. 2, 3, 4, 5
+0x41 0x21 0x62 0x78
+
+# CHECK: rldicr 2, 3, 4, 5
+0x44 0x21 0x62 0x78
+
+# CHECK: rldicr. 2, 3, 4, 5
+0x45 0x21 0x62 0x78
+
+# CHECK: rldic 2, 3, 4, 5
+0x48 0x21 0x62 0x78
+
+# CHECK: rldic. 2, 3, 4, 5
+0x49 0x21 0x62 0x78
+
+# CHECK: rldcl 2, 3, 4, 5
+0x50 0x21 0x62 0x78
+
+# CHECK: rldcl. 2, 3, 4, 5
+0x51 0x21 0x62 0x78
+
+# CHECK: rldcr 2, 3, 4, 5
+0x52 0x21 0x62 0x78
+
+# CHECK: rldcr. 2, 3, 4, 5
+0x53 0x21 0x62 0x78
+
+# CHECK: rldimi 2, 3, 4, 5
+0x4c 0x21 0x62 0x78
+
+# CHECK: rldimi. 2, 3, 4, 5
+0x4d 0x21 0x62 0x78
+
+# CHECK: slw 2, 3, 4
+0x30 0x20 0x62 0x7c
+
+# CHECK: slw. 2, 3, 4
+0x31 0x20 0x62 0x7c
+
+# CHECK: srw 2, 3, 4
+0x30 0x24 0x62 0x7c
+
+# CHECK: srw. 2, 3, 4
+0x31 0x24 0x62 0x7c
+
+# CHECK: srawi 2, 3, 4
+0x70 0x26 0x62 0x7c
+
+# CHECK: srawi. 2, 3, 4
+0x71 0x26 0x62 0x7c
+
+# CHECK: sraw 2, 3, 4
+0x30 0x26 0x62 0x7c
+
+# CHECK: sraw. 2, 3, 4
+0x31 0x26 0x62 0x7c
+
+# CHECK: sld 2, 3, 4
+0x36 0x20 0x62 0x7c
+
+# CHECK: sld. 2, 3, 4
+0x37 0x20 0x62 0x7c
+
+# CHECK: srd 2, 3, 4
+0x36 0x24 0x62 0x7c
+
+# CHECK: srd. 2, 3, 4
+0x37 0x24 0x62 0x7c
+
+# CHECK: sradi 2, 3, 4
+0x74 0x26 0x62 0x7c
+
+# CHECK: sradi. 2, 3, 4
+0x75 0x26 0x62 0x7c
+
+# CHECK: srad 2, 3, 4
+0x34 0x26 0x62 0x7c
+
+# CHECK: srad. 2, 3, 4
+0x35 0x26 0x62 0x7c
+
+# CHECK: mtspr 600, 2
+0xa6 0x93 0x58 0x7c
+
+# CHECK: mfspr 2, 600
+0xa6 0x92 0x58 0x7c
+
+# CHECK: mtcrf 123, 2
+0x20 0xb1 0x47 0x7c
+
+# CHECK: mfcr 2
+0x26 0x00 0x40 0x7c
+
+# CHECK: mtocrf 16, 2
+0x20 0x01 0x51 0x7c
+
+# CHECK: mfocrf 16, 8
+0x26 0x80 0x10 0x7e
+
+# CHECK: mtsrin 10, 12
+0xe4 0x61 0x40 0x7d
+# CHECK: mfsrin 10, 12
+0x26 0x65 0x40 0x7d
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 30fe6c8b9b15..6fde42bd898d 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -665,3 +665,17 @@ frstor dword ptr [eax]
// CHECK: cmpnless %xmm1, %xmm0
cmpnless xmm0, xmm1
+
+insb
+insw
+insd
+// CHECK: insb %dx, %es:(%rdi)
+// CHECK: insw %dx, %es:(%rdi)
+// CHECK: insl %dx, %es:(%rdi)
+
+outsb
+outsw
+outsd
+// CHECK: outsb (%rsi), %dx
+// CHECK: outsw (%rsi), %dx
+// CHECK: outsl (%rsi), %dx
diff --git a/test/Object/archive-extract.test b/test/Object/archive-extract.test
index a77adf2cabbd..50372d530d88 100644
--- a/test/Object/archive-extract.test
+++ b/test/Object/archive-extract.test
@@ -53,4 +53,4 @@ RUN: llvm-ar p %p/Inputs/thin.a evenlen | FileCheck %s --check-prefix=EVENLEN
EVENLEN: evenlen
RUN: not llvm-ar p %p/Inputs/thin-path.a t/test2.o | FileCheck %s --check-prefix=MISSING
-MISSING: No such file or directory.
+MISSING: {{N|n}}o such file or directory.
diff --git a/test/Transforms/GVN/pr24397.ll b/test/Transforms/GVN/pr24397.ll
new file mode 100644
index 000000000000..db43964e2e4c
--- /dev/null
+++ b/test/Transforms/GVN/pr24397.ll
@@ -0,0 +1,18 @@
+; RUN: opt -basicaa -gvn -disable-output < %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @foo(i64** %arrayidx) {
+entry:
+ %p = load i64*, i64** %arrayidx, align 8
+ %cmpnull = icmp eq i64* %p, null
+ br label %BB2
+
+entry2: ; No predecessors!
+ br label %BB2
+
+BB2: ; preds = %entry2, %entry
+ %bc = bitcast i64** %arrayidx to i64*
+ %load = load i64, i64* %bc, align 8
+ ret i64 %load
+}
diff --git a/test/Transforms/InstCombine/pr24354.ll b/test/Transforms/InstCombine/pr24354.ll
new file mode 100644
index 000000000000..3b36fd1b74e3
--- /dev/null
+++ b/test/Transforms/InstCombine/pr24354.ll
@@ -0,0 +1,33 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This used to crash opt
+
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@a = common global i16 0, align 2
+@d = common global i32 0, align 4
+
+define void @fn3() {
+; CHECK: @fn3
+bb:
+ %tmp = load i32, i32* @c, align 4
+ %tmp1 = icmp eq i32 %tmp, 0
+ br i1 %tmp1, label %bb2, label %bb6
+
+bb2: ; preds = %bb
+ %tmp3 = load i32, i32* @b, align 4
+ %tmp.i = add nsw i32 255, %tmp3
+ %tmp5 = icmp ugt i32 %tmp.i, 254
+ br label %bb6
+
+bb6: ; preds = %bb, %bb2
+ %tmp7 = phi i1 [ true, %bb ], [ %tmp5, %bb2 ]
+ %tmp8 = zext i1 %tmp7 to i32
+ %tmp10 = icmp eq i32 %tmp8, 0
+ %tmp12 = load i16, i16* @a, align 2
+ %tmp14 = icmp ne i16 %tmp12, 0
+ %tmp16 = select i1 %tmp10, i1 false, i1 %tmp14
+ %tmp17 = zext i1 %tmp16 to i32
+ store i32 %tmp17, i32* @d, align 4
+ ret void
+}
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 727eb4ebb4c8..af18b4cfbdd1 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -150,3 +150,14 @@ entry:
ret <4 x float> undef
}
+define <8 x i32> @pr24458(<8 x float> %n) {
+; CHECK-LABEL: @pr24458
+ %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
+ %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
+ %notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32>
+ %equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32>
+ %wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i
+ ret <8 x i32> %wrong
+; CHECK-NEXT: ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
diff --git a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
index 7e391aba3045..441bc1adca7e 100644
--- a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
+++ b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
@@ -36,3 +36,13 @@ define i32 @test3(i32 %a, float %b) {
; CHECK-LABEL: @test3(
; CHECK: ret i32 %a
}
+
+define i8 @test4(<8 x i8> %V) {
+ %add = add <8 x i8> %V, bitcast (double 0x319BEB8FD172E36 to <8 x i8>)
+ %extract = extractelement <8 x i8> %add, i32 6
+ ret i8 %extract
+; CHECK-LABEL: @test4(
+; CHECK: %[[add:.*]] = add <8 x i8> %V, bitcast (<1 x double> <double 0x319BEB8FD172E36> to <8 x i8>)
+; CHECK-NEXT: %[[extract:.*]] = extractelement <8 x i8> %[[add]], i32 6
+; CHECK-NEXT: ret i8 %[[extract]]
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 7c8955b28fa2..ad2794167a5e 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1195,20 +1195,24 @@ entry:
%a = alloca <{ i1 }>, align 8
%b = alloca <{ i1 }>, align 8
; CHECK: %[[a:.*]] = alloca i8, align 8
+; CHECK-NEXT: %[[b:.*]] = alloca i8, align 8
%b.i1 = bitcast <{ i1 }>* %b to i1*
store i1 %x, i1* %b.i1, align 8
%b.i8 = bitcast <{ i1 }>* %b to i8*
%foo = load i8, i8* %b.i8, align 1
-; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
-; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
-; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
+; CHECK-NEXT: %[[b_cast:.*]] = bitcast i8* %[[b]] to i1*
+; CHECK-NEXT: store i1 %x, i1* %[[b_cast]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8
%a.i8 = bitcast <{ i1 }>* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
%bar = load i8, i8* %a.i8, align 1
%a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
%baz = load i1, i1* %a.i1, align 1
+; CHECK-NEXT: %[[copy:.*]] = load i8, i8* %[[b]], align 8
+; CHECK-NEXT: store i8 %[[copy]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index b5a04ca8e64a..4de7bfcb898d 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -112,3 +112,126 @@ entry:
; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
; CHECK-NEXT: ret i64 %[[ret]]
}
+
+define i64 @PR14132(i1 %flag) {
+; CHECK-LABEL: @PR14132(
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this. This test is particularly interesting because how we handle
+; a load of an i64 from an i8 alloca is dependent on endianness.
+entry:
+ %a = alloca i64, align 8
+ %b = alloca i8, align 8
+ %ptr = alloca i64*, align 8
+; CHECK-NOT: alloca
+
+ %ptr.cast = bitcast i64** %ptr to i8**
+ store i64 0, i64* %a
+ store i8 1, i8* %b
+ store i64* %a, i64** %ptr
+ br i1 %flag, label %if.then, label %if.end
+
+if.then:
+ store i8* %b, i8** %ptr.cast
+ br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
+; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
+
+if.end:
+ %tmp = load i64*, i64** %ptr
+ %result = load i64, i64* %tmp
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
+
+ ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
+
+declare void @f(i64 %x, i32 %y)
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+;
+; This is a test that specifically exercises the big-endian lowering because it
+; ends up splitting a 64-bit integer into two smaller integers and has a number
+; of tricky aspects (the i24 type) that make that hard. Historically, SROA
+; would miscompile this by either dropping a most significant byte or least
+; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
+; move the bytes around correctly.
+;
+; The magical number 34494054408 is used because it has bits set in various
+; bytes so that it is clear if those bytes fail to be propagated.
+;
+; If you're debugging this, rather than using the direct magical numbers, run
+; the IR through '-sroa -instcombine'. With '-instcombine' these will be
+; constant folded, and if the i64 doesn't round-trip correctly, you've found
+; a bug!
+;
+entry:
+ %a = alloca { i32, i24 }, align 4
+; CHECK-NOT: alloca
+
+ %tmp0 = bitcast { i32, i24 }* %a to i64*
+ store i64 34494054408, i64* %tmp0
+ %tmp1 = load i64, i64* %tmp0, align 4
+ %tmp2 = bitcast { i32, i24 }* %a to i32*
+ %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+ call void @f(i64 %tmp1, i32 %tmp3)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
+ ret void
+; CHECK: ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4
+;
+; Much like @test3, this is specifically testing big-endian management of data.
+; Also similarly, it uses constants with particular bits set to help track
+; whether values are corrupted, and can be easily evaluated by running through
+; -instcombine to see that the i64 round-trips.
+;
+entry:
+ %a = alloca { i32, i24 }, align 4
+ %a2 = alloca i64, align 4
+; CHECK-NOT: alloca
+
+ store i64 34494054408, i64* %a2
+ %tmp0 = bitcast { i32, i24 }* %a to i8*
+ %tmp1 = bitcast i64* %a2 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
+; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
+; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
+; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
+
+ %tmp2 = bitcast { i32, i24 }* %a to i64*
+ %tmp3 = load i64, i64* %tmp2, align 4
+ %tmp4 = bitcast { i32, i24 }* %a to i32*
+ %tmp5 = load i32, i32* %tmp4, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+ call void @f(i64 %tmp3, i32 %tmp5)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
+ ret void
+; CHECK: ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index e97bd66d052a..fb76548b1d18 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -438,26 +438,26 @@ define i64 @PR14132(i1 %flag) {
; steps to only use the not-undef bits, and so we need to at least loosely
; support this..
entry:
- %a = alloca i64
- %b = alloca i8
- %ptr = alloca i64*
+ %a = alloca i64, align 8
+ %b = alloca i8, align 8
+ %ptr = alloca i64*, align 8
; CHECK-NOT: alloca
%ptr.cast = bitcast i64** %ptr to i8**
- store i64 0, i64* %a
- store i8 1, i8* %b
- store i64* %a, i64** %ptr
+ store i64 0, i64* %a, align 8
+ store i8 1, i8* %b, align 8
+ store i64* %a, i64** %ptr, align 8
br i1 %flag, label %if.then, label %if.end
if.then:
- store i8* %b, i8** %ptr.cast
+ store i8* %b, i8** %ptr.cast, align 8
br label %if.end
; CHECK-NOT: store
; CHECK: %[[ext:.*]] = zext i8 1 to i64
if.end:
- %tmp = load i64*, i64** %ptr
- %result = load i64, i64* %tmp
+ %tmp = load i64*, i64** %ptr, align 8
+ %result = load i64, i64* %tmp, align 8
; CHECK-NOT: load
; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
diff --git a/test/Transforms/Scalarizer/cache-bug.ll b/test/Transforms/Scalarizer/cache-bug.ll
new file mode 100644
index 000000000000..f8c2d100d59a
--- /dev/null
+++ b/test/Transforms/Scalarizer/cache-bug.ll
@@ -0,0 +1,30 @@
+; RUN: opt -scalarizer -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Check that vector element 1 is scalarized correctly from a chain of
+; insertelement instructions
+define void @func(i32 %x) {
+; CHECK-LABEL: @func(
+; CHECK-NOT: phi i32 [ %x, %entry ], [ %inc.pos.y, %loop ]
+; CHECK: phi i32 [ %inc, %entry ], [ %inc.pos.y, %loop ]
+; CHECK: ret void
+entry:
+ %vecinit = insertelement <2 x i32> <i32 0, i32 0>, i32 %x, i32 1
+ %inc = add i32 %x, 1
+ %0 = insertelement <2 x i32> %vecinit, i32 %inc, i32 1
+ br label %loop
+
+loop:
+ %pos = phi <2 x i32> [ %0, %entry ], [ %new.pos.y, %loop ]
+ %i = phi i32 [ 0, %entry ], [ %new.i, %loop ]
+ %pos.y = extractelement <2 x i32> %pos, i32 1
+ %inc.pos.y = add i32 %pos.y, 1
+ %new.pos.y = insertelement <2 x i32> %pos, i32 %inc.pos.y, i32 1
+ %new.i = add i32 %i, 1
+ %cmp2 = icmp slt i32 %new.i, 1
+ br i1 %cmp2, label %loop, label %exit
+
+exit:
+ ret void
+}