diff options
Diffstat (limited to 'test/CodeGen/SystemZ')
385 files changed, 23574 insertions, 3767 deletions
diff --git a/test/CodeGen/SystemZ/Large/branch-range-01.py b/test/CodeGen/SystemZ/Large/branch-range-01.py new file mode 100644 index 0000000000000..552c9ca0ea851 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-01.py @@ -0,0 +1,105 @@ +# Test normal conditional branches in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffd8 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 8 bytes if it uses a short branch +# and 10 if it uses a long one. The ones before "main:" have to take the branch +# length into account -- which is 4 bytes for short branches -- so the final +# (0x28 - 4) / 8 == 4 blocks can use short branches. The ones after "main:" +# do not, so the first 0x28 / 8 == 5 can use short branches. However, +# the conservative algorithm we use makes one branch unnecessarily long +# on each side. +# +# CHECK: c %r4, 0(%r3) +# CHECK: jge [[LABEL:\.L[^ ]*]] +# CHECK: c %r4, 4(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 8(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 12(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 16(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 20(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 24(%r3) +# CHECK: j{{g?}}e [[LABEL]] +# CHECK: c %r4, 28(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 32(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 36(%r3) +# CHECK: je [[LABEL]] +# ...main goes here... +# CHECK: c %r4, 100(%r3) +# CHECK: je [[LABEL:\.L[^ ]*]] +# CHECK: c %r4, 104(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 108(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 112(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 116(%r3) +# CHECK: j{{g?}}e [[LABEL]] +# CHECK: c %r4, 120(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 124(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 128(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 132(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 136(%r3) +# CHECK: jge [[LABEL]] + +branch_blocks = 10 +main_size = 0xffd8 + +print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i) + print ' %%bcur%d = load volatile i32 *%%bstop%d' % (i, i) + print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25) + print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i) + print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-02.py b/test/CodeGen/SystemZ/Large/branch-range-02.py new file mode 100644 index 0000000000000..0b21ced99a1f8 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-02.py @@ -0,0 +1,82 @@ +# Test normal conditional branches in cases where block alignments cause +# some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu -align-all-blocks=8 | FileCheck %s + +# Construct: +# +# b0: +# conditional branch to end +# ... +# b<N>: +# conditional branch to end +# b<N+1>: +# conditional branch to b0 +# ... +# b<2*N>: +# conditional branch to b0 +# end: +# +# with N == 256 + 4. The -align-all-blocks=8 option ensures that all blocks +# are 256 bytes in size. The first 4 blocks and the last 4 blocks are then +# out of range. +# +# CHECK: c %r4, 0(%r3) +# CHECK: jge [[LABEL:\.L[^ ]*]] +# CHECK: c %r4, 4(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 8(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 12(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 16(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 20(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 24(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 28(%r3) +# CHECK: je [[LABEL]] +# ...lots of other blocks... +# CHECK: c %r4, 1004(%r3) +# CHECK: je [[LABEL:\.L[^ ]*]] +# CHECK: c %r4, 1008(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 1012(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 1016(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 1020(%r3) +# CHECK: je [[LABEL]] +# CHECK: c %r4, 1024(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 1028(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 1032(%r3) +# CHECK: jge [[LABEL]] +# CHECK: c %r4, 1036(%r3) +# CHECK: jge [[LABEL]] + +blocks = 256 + 4 + +print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {' +print 'entry:' +print ' br label %b0' +print '' + +a, b = 1, 1 +for i in xrange(blocks): + a, b = b, a + b + value = a % 256 + next = 'b%d' % (i + 1) if i + 1 < blocks else 'end' + other = 'end' if 2 * i < blocks else 'b0' + print 'b%d:' % i + print ' store volatile i8 %d, i8 *%%base' % value + print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i) + print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i) + print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i) + print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next) + +print '' +print '%s:' % next +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-03.py b/test/CodeGen/SystemZ/Large/branch-range-03.py new file mode 100644 index 0000000000000..75cdf247c6f3d --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-03.py @@ -0,0 +1,107 @@ +# Test 32-bit COMPARE AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 14 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. +# +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 1(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 2(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 3(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 4(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 5(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 6(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 7(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# ...main goes here... +# CHECK: lb [[REG:%r[0-5]]], 25(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 26(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 27(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 28(%r3) +# CHECK: crje %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 29(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 30(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 31(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 32(%r3) +# CHECK: cr %r4, [[REG]] +# CHECK: jge [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop, i32 %limit) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) + print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) + print ' %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) + print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) + print ' %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-04.py b/test/CodeGen/SystemZ/Large/branch-range-04.py new file mode 100644 index 0000000000000..3ae3ae9c37f7d --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-04.py @@ -0,0 +1,111 @@ +# Test 64-bit COMPARE AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 16 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 1(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 2(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 3(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 4(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# ...as mentioned above, the next one could be a CGRJE instead... +# CHECK: lgb [[REG:%r[0-5]]], 5(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 6(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 7(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL]] +# ...main goes here... +# CHECK: lgb [[REG:%r[0-5]]], 25(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 26(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 27(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 28(%r3) +# CHECK: cgrje %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 29(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 30(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 31(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 32(%r3) +# CHECK: cgr %r4, [[REG]] +# CHECK: jge [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop, i64 %limit) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) + print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) + print ' %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) + print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) + print ' %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-05.py b/test/CodeGen/SystemZ/Large/branch-range-05.py new file mode 100644 index 0000000000000..6928b8fc21d6c --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-05.py @@ -0,0 +1,109 @@ +# Test 32-bit COMPARE IMMEDIATE AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 16 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 50 +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 51 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 52 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 53 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 54 +# CHECK: jgl [[LABEL]] +# ...as mentioned above, the next one could be a CIJL instead... +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 55 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 56, [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 57, [[LABEL]] +# ...main goes here... +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 100, [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 101, [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 102, [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cijl [[REG]], 103, [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 104 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 105 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 106 +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: chi [[REG]], 107 +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bcur%d = load volatile i8 *%%stop' % i + print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) + print ' %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%acur%d = load volatile i8 *%%stop' % i + print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) + print ' %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-06.py b/test/CodeGen/SystemZ/Large/branch-range-06.py new file mode 100644 index 0000000000000..aabc72fa6ec81 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-06.py @@ -0,0 +1,109 @@ +# Test 64-bit COMPARE IMMEDIATE AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 16 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 50 +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 51 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 52 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 53 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 54 +# CHECK: jgl [[LABEL]] +# ...as mentioned above, the next one could be a CGIJL instead... +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 55 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 56, [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 57, [[LABEL]] +# ...main goes here... +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 100, [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 101, [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 102, [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cgijl [[REG]], 103, [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 104 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 105 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 106 +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: cghi [[REG]], 107 +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bcur%d = load volatile i8 *%%stop' % i + print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) + print ' %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%acur%d = load volatile i8 *%%stop' % i + print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) + print ' %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-07.py b/test/CodeGen/SystemZ/Large/branch-range-07.py new file mode 100644 index 0000000000000..90c442092e827 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-07.py @@ -0,0 +1,68 @@ +# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out +# of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# loopN: +# load of countN +# ... +# loop0: +# 0xffd8 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# decrement of countN +# conditional branch to loopN +# afterN: +# +# Each load occupies 4 bytes. Each decrement and branch occupies 4 +# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL). +# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes +# and is therefore (just) in range. Loop 7 is out of range. +# +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: ahi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: ahi {{%r[0-9]+}}, -1 +# CHECK: jglh + +branch_blocks = 8 +main_size = 0xffd8 + +print 'define void @f1(i8 *%base, i32 *%counts) {' +print 'entry:' + +for i in xrange(branch_blocks - 1, -1, -1): + print ' %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i) + print ' %%initcount%d = load i32 *%%countptr%d' % (i, i) + print ' br label %%loop%d' % i + + print 'loop%d:' % i + block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1) + block2 = 'loop0' if i == 0 else 'after%d' % (i - 1) + print (' %%count%d = phi i32 [ %%initcount%d, %%%s ],' + ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2)) + +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%nextcount%d = add i32 %%count%d, -1' % (i, i) + print ' %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i) + print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-08.py b/test/CodeGen/SystemZ/Large/branch-range-08.py new file mode 100644 index 0000000000000..ac1b1370a3e3d --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-08.py @@ -0,0 +1,69 @@ +# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out +# of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# loopN: +# load of countN +# ... +# loop0: +# 0xffd8 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# decrement of countN +# conditional branch to loopN +# afterN: +# +# Each load occupies 6 bytes. Each decrement and branch occupies 4 +# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL). +# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes +# and is therefore (just) in range. Loop 6 is out of range. +# +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh + +branch_blocks = 8 +main_size = 0xffd8 + +print 'define void @f1(i8 *%base, i64 *%counts) {' +print 'entry:' + +for i in xrange(branch_blocks - 1, -1, -1): + print ' %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i) + print ' %%initcount%d = load i64 *%%countptr%d' % (i, i) + print ' br label %%loop%d' % i + + print 'loop%d:' % i + block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1) + block2 = 'loop0' if i == 0 else 'after%d' % (i - 1) + print (' %%count%d = phi i64 [ %%initcount%d, %%%s ],' + ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2)) + +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%nextcount%d = add i64 %%count%d, -1' % (i, i) + print ' %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i) + print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-09.py b/test/CodeGen/SystemZ/Large/branch-range-09.py new file mode 100644 index 0000000000000..b3fd81324dab9 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-09.py @@ -0,0 +1,107 @@ +# Test 32-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 14 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. +# +# CHECK: lb [[REG:%r[0-5]]], 0(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 1(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 2(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 3(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 4(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 5(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 6(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 7(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# ...main goes here... +# CHECK: lb [[REG:%r[0-5]]], 25(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]] +# CHECK: lb [[REG:%r[0-5]]], 26(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 27(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 28(%r3) +# CHECK: clrjl %r4, [[REG]], [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 29(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 30(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 31(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lb [[REG:%r[0-5]]], 32(%r3) +# CHECK: clr %r4, [[REG]] +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop, i32 %limit) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) + print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) + print ' %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) + print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) + print ' %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-10.py b/test/CodeGen/SystemZ/Large/branch-range-10.py new file mode 100644 index 0000000000000..3aeea3ebccdf4 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-10.py @@ -0,0 +1,111 @@ +# Test 64-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of +# instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffcc bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 12 bytes if it uses a short +# branch and 16 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x34 - 6) / 12 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: lgb [[REG:%r[0-5]]], 0(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 1(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 2(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 3(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 4(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# ...as mentioned above, the next one could be a CLGRJL instead... +# CHECK: lgb [[REG:%r[0-5]]], 5(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 6(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 7(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL]] +# ...main goes here... +# CHECK: lgb [[REG:%r[0-5]]], 25(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]] +# CHECK: lgb [[REG:%r[0-5]]], 26(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 27(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 28(%r3) +# CHECK: clgrjl %r4, [[REG]], [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 29(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 30(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 31(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] +# CHECK: lgb [[REG:%r[0-5]]], 32(%r3) +# CHECK: clgr %r4, [[REG]] +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffcc + +print 'define void @f1(i8 *%base, i8 *%stop, i64 %limit) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) + print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) + print ' %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) + print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) + print ' %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-11.py b/test/CodeGen/SystemZ/Large/branch-range-11.py new file mode 100644 index 0000000000000..034902c4a3420 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-11.py @@ -0,0 +1,127 @@ +# Test 32-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer +# number of instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffc6 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 14 bytes if it uses a short +# branch and 20 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x3a - 6) / 14 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x3a / 14 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 50 +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 51 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 52 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 53 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 54 +# CHECK: jgl [[LABEL]] +# ...as mentioned above, the next one could be a CLIJL instead... +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 55 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 56, [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 57, [[LABEL]] +# ...main goes here... +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 100, [[LABEL:\.L[^ ]*]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 101, [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 102, [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clijl [[REG]], 103, [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 104 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 105 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 106 +# CHECK: jgl [[LABEL]] +# CHECK: l [[REG:%r[0-5]]], 0(%r3) +# CHECK: s [[REG]], 0(%r4) +# CHECK: clfi [[REG]], 107 +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffc6 + +print 'define void @f1(i8 *%base, i32 *%stopa, i32 *%stopb) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bcur%da = load volatile i32 *%%stopa' % i + print ' %%bcur%db = load volatile i32 *%%stopb' % i + print ' %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i) + print ' %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%acur%da = load volatile i32 *%%stopa' % i + print ' %%acur%db = load volatile i32 *%%stopb' % i + print ' %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i) + print ' %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-12.py b/test/CodeGen/SystemZ/Large/branch-range-12.py new file mode 100644 index 0000000000000..007d477e2140d --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-12.py @@ -0,0 +1,127 @@ +# Test 64-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer +# number of instructions causes some branches to be out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# before0: +# conditional branch to after0 +# ... +# beforeN: +# conditional branch to after0 +# main: +# 0xffb4 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# conditional branch to main +# afterN: +# +# Each conditional branch sequence occupies 18 bytes if it uses a short +# branch and 24 if it uses a long one. The ones before "main:" have to +# take the branch length into account, which is 6 for short branches, +# so the final (0x4c - 6) / 18 == 3 blocks can use short branches. +# The ones after "main:" do not, so the first 0x4c / 18 == 4 blocks +# can use short branches. The conservative algorithm we use makes +# one of the forward branches unnecessarily long, as noted in the +# check output below. +# +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 50 +# CHECK: jgl [[LABEL:\.L[^ ]*]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 51 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 52 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 53 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 54 +# CHECK: jgl [[LABEL]] +# ...as mentioned above, the next one could be a CLGIJL instead... +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 55 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 56, [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 57, [[LABEL]] +# ...main goes here... +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 100, [[LABEL:\.L[^ ]*]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 101, [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 102, [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgijl [[REG]], 103, [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 104 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 105 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 106 +# CHECK: jgl [[LABEL]] +# CHECK: lg [[REG:%r[0-5]]], 0(%r3) +# CHECK: sg [[REG]], 0(%r4) +# CHECK: clgfi [[REG]], 107 +# CHECK: jgl [[LABEL]] + +branch_blocks = 8 +main_size = 0xffb4 + +print 'define void @f1(i8 *%base, i64 *%stopa, i64 *%stopb) {' +print 'entry:' +print ' br label %before0' +print '' + +for i in xrange(branch_blocks): + next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' + print 'before%d:' % i + print ' %%bcur%da = load volatile i64 *%%stopa' % i + print ' %%bcur%db = load volatile i64 *%%stopb' % i + print ' %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i) + print ' %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50) + print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) + print '' + +print '%s:' % next +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%acur%da = load volatile i64 *%%stopa' % i + print ' %%acur%db = load volatile i64 *%%stopb' % i + print ' %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i) + print ' %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100) + print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/lit.local.cfg b/test/CodeGen/SystemZ/Large/lit.local.cfg new file mode 100644 index 0000000000000..9a02f849c3478 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/lit.local.cfg @@ -0,0 +1,10 @@ +config.suffixes = ['.py'] + +# These tests take on the order of seconds to run, so skip them unless +# running natively. +if config.root.host_arch not in ['SystemZ']: + config.unsupported = True + +targets = set(config.root.targets_to_build.split()) +if not 'SystemZ' in targets: + config.unsupported = True diff --git a/test/CodeGen/SystemZ/Large/spill-01.py b/test/CodeGen/SystemZ/Large/spill-01.py new file mode 100644 index 0000000000000..3c1d0b611bb46 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/spill-01.py @@ -0,0 +1,40 @@ +# Test cases where MVC is used for spill slots that end up being out of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# There are 8 usable call-saved GPRs, two of which are needed for the base +# registers. The first 160 bytes of the frame are needed for the ABI +# call frame, and a further 8 bytes are needed for the emergency spill slot. +# That means we will have at least one out-of-range slot if: +# +# count == (4096 - 168) / 8 + 6 + 1 == 498 +# +# Add in some extra room and check both %r15+4096 (the first out-of-range slot) +# and %r15+4104. +# +# CHECK: f1: +# CHECK: lay [[REG:%r[0-5]]], 4096(%r15) +# CHECK: mvc 0(8,[[REG]]), {{[0-9]+}}({{%r[0-9]+}}) +# CHECK: brasl %r14, foo@PLT +# CHECK: lay [[REG:%r[0-5]]], 4096(%r15) +# CHECK: mvc {{[0-9]+}}(8,{{%r[0-9]+}}), 8([[REG]]) +# CHECK: br %r14 +count = 500 + +print 'declare void @foo()' +print '' +print 'define void @f1(i64 *%base0, i64 *%base1) {' + +for i in range(count): + print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2) + print ' %%val%d = load i64 *%%ptr%d' % (i, i) + print '' + +print ' call void @foo()' +print '' + +for i in range(count): + print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i) + +print '' +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/spill-02.py b/test/CodeGen/SystemZ/Large/spill-02.py new file mode 100644 index 0000000000000..0aa43d18054be --- /dev/null +++ b/test/CodeGen/SystemZ/Large/spill-02.py @@ -0,0 +1,73 @@ +# Test cases where we spill from one frame index to another, both of which +# are out of range of MVC, and both of which need emergency spill slots. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# CHECK: f1: +# CHECK: %fallthru +# CHECK-DAG: stg [[REG1:%r[0-9]+]], 8168(%r15) +# CHECK-DAG: stg [[REG2:%r[0-9]+]], 8176(%r15) +# CHECK-DAG: lay [[REG3:%r[0-9]+]], 8192(%r15) +# CHECK-DAG: lay [[REG4:%r[0-9]+]], 4096(%r15) +# CHECK: mvc 0(8,[[REG3]]), 4088([[REG4]]) +# CHECK-DAG: lg [[REG1]], 8168(%r15) +# CHECK-DAG: lg [[REG2]], 8176(%r15) +# CHECK: %skip +# CHECK: br %r14 + +# Arrange for %foo's spill slot to be at 8184(%r15) and the alloca area to be at +# 8192(%r15). The two emergency spill slots live below that, so this requires +# the first 8168 bytes to be used for the call. 160 of these bytes are +# allocated for the ABI frame. There are also 5 argument registers, one of +# which is used as a base pointer. +args = (8168 - 160) / 8 + (5 - 1) + +print 'declare i64 *@foo(i64 *%s)' % (', i64' * args) +print 'declare void @bar(i64 *)' +print '' +print 'define i64 @f1(i64 %foo) {' +print 'entry:' + +# Make the allocation big, so that it goes at the top of the frame. +print ' %array = alloca [1000 x i64]' +print ' %area = getelementptr [1000 x i64] *%array, i64 0, i64 0' +print ' %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args) +print '' + +# Make sure all GPRs are used. One is needed for the stack pointer and +# another for %base, so we need 14 live values. +count = 14 +for i in range(count): + print ' %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2) + print ' %%val%d = load volatile i64 *%%ptr%d' % (i, i) + print '' + +# Encourage the register allocator to give preference to these %vals +# by using them several times. +for j in range(4): + for i in range(count): + print ' store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i) + print '' + +# Copy the incoming argument, which we expect to be spilled, to the frame +# index for the alloca area. Also throw in a volatile store, so that this +# block cannot be reordered with the surrounding code. +print ' %cond = icmp eq i64 %val0, %val1' +print ' br i1 %cond, label %skip, label %fallthru' +print '' +print 'fallthru:' +print ' store i64 %foo, i64 *%area' +print ' store volatile i64 %val0, i64 *%ptr0' +print ' br label %skip' +print '' +print 'skip:' + +# Use each %val a few more times to emphasise the point, and to make sure +# that they are live across the store of %foo. +for j in range(4): + for i in range(count): + print ' store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i) + print '' + +print ' call void @bar(i64 *%area)' +print ' ret i64 0' +print '}' diff --git a/test/CodeGen/SystemZ/addr-01.ll b/test/CodeGen/SystemZ/addr-01.ll index c125ffa71a71a..d0960cdb1047a 100644 --- a/test/CodeGen/SystemZ/addr-01.ll +++ b/test/CodeGen/SystemZ/addr-01.ll @@ -5,7 +5,7 @@ ; A simple index address. define void @f1(i64 %addr, i64 %index) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lb %r0, 0(%r3,%r2) ; CHECK: br %r14 %add = add i64 %addr, %index @@ -16,7 +16,7 @@ define void @f1(i64 %addr, i64 %index) { ; An address with an index and a displacement (order 1). define void @f2(i64 %addr, i64 %index) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lb %r0, 100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, %index @@ -28,7 +28,7 @@ define void @f2(i64 %addr, i64 %index) { ; An address with an index and a displacement (order 2). define void @f3(i64 %addr, i64 %index) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lb %r0, 100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, 100 @@ -40,7 +40,7 @@ define void @f3(i64 %addr, i64 %index) { ; An address with an index and a subtracted displacement (order 1). define void @f4(i64 %addr, i64 %index) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lb %r0, -100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, %index @@ -52,7 +52,7 @@ define void @f4(i64 %addr, i64 %index) { ; An address with an index and a subtracted displacement (order 2). define void @f5(i64 %addr, i64 %index) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lb %r0, -100(%r3,%r2) ; CHECK: br %r14 %add1 = sub i64 %addr, 100 @@ -64,7 +64,7 @@ define void @f5(i64 %addr, i64 %index) { ; An address with an index and a displacement added using OR. define void @f6(i64 %addr, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nill %r2, 65528 ; CHECK: lb %r0, 6(%r3,%r2) ; CHECK: br %r14 @@ -78,7 +78,7 @@ define void @f6(i64 %addr, i64 %index) { ; Like f6, but without the masking. This OR doesn't count as a displacement. define void @f7(i64 %addr, i64 %index) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oill %r2, 6 ; CHECK: lb %r0, 0(%r3,%r2) ; CHECK: br %r14 @@ -92,7 +92,7 @@ define void @f7(i64 %addr, i64 %index) { ; Like f6, but with the OR applied after the index. We don't know anything ; about the alignment of %add here. define void @f8(i64 %addr, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: nill %r2, 65528 ; CHECK: agr %r2, %r3 ; CHECK: oill %r2, 6 diff --git a/test/CodeGen/SystemZ/addr-02.ll b/test/CodeGen/SystemZ/addr-02.ll index 6772c1d41800f..56c48794b0721 100644 --- a/test/CodeGen/SystemZ/addr-02.ll +++ b/test/CodeGen/SystemZ/addr-02.ll @@ -6,7 +6,7 @@ ; A simple index address. define void @f1(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lb %r0, 0(%r3,%r2) ; CHECK: br %r14 %add = add i64 %addr, %index @@ -18,7 +18,7 @@ define void @f1(i64 %addr, i64 %index, i8 **%dst) { ; An address with an index and a displacement (order 1). define void @f2(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lb %r0, 100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, %index @@ -31,7 +31,7 @@ define void @f2(i64 %addr, i64 %index, i8 **%dst) { ; An address with an index and a displacement (order 2). define void @f3(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lb %r0, 100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, 100 @@ -44,7 +44,7 @@ define void @f3(i64 %addr, i64 %index, i8 **%dst) { ; An address with an index and a subtracted displacement (order 1). define void @f4(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lb %r0, -100(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %addr, %index @@ -57,7 +57,7 @@ define void @f4(i64 %addr, i64 %index, i8 **%dst) { ; An address with an index and a subtracted displacement (order 2). define void @f5(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lb %r0, -100(%r3,%r2) ; CHECK: br %r14 %add1 = sub i64 %addr, 100 @@ -70,7 +70,7 @@ define void @f5(i64 %addr, i64 %index, i8 **%dst) { ; An address with an index and a displacement added using OR. define void @f6(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nill %r2, 65528 ; CHECK: lb %r0, 6(%r3,%r2) ; CHECK: br %r14 @@ -85,7 +85,7 @@ define void @f6(i64 %addr, i64 %index, i8 **%dst) { ; Like f6, but without the masking. This OR doesn't count as a displacement. define void @f7(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oill %r2, 6 ; CHECK: lb %r0, 0(%r3,%r2) ; CHECK: br %r14 @@ -100,7 +100,7 @@ define void @f7(i64 %addr, i64 %index, i8 **%dst) { ; Like f6, but with the OR applied after the index. We don't know anything ; about the alignment of %add here. define void @f8(i64 %addr, i64 %index, i8 **%dst) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: nill %r2, 65528 ; CHECK: agr %r2, %r3 ; CHECK: oill %r2, 6 diff --git a/test/CodeGen/SystemZ/addr-03.ll b/test/CodeGen/SystemZ/addr-03.ll index dbdb9f15b4f10..1146926a4c2eb 100644 --- a/test/CodeGen/SystemZ/addr-03.ll +++ b/test/CodeGen/SystemZ/addr-03.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lb %r0, 0 ; CHECK: br %r14 %ptr = inttoptr i64 0 to i8 * @@ -12,7 +12,7 @@ define void @f1() { } define void @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lb %r0, -524288 ; CHECK: br %r14 %ptr = inttoptr i64 -524288 to i8 * @@ -21,7 +21,7 @@ define void @f2() { } define void @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: lb %r0, -524289 ; CHECK: br %r14 %ptr = inttoptr i64 -524289 to i8 * @@ -30,7 +30,7 @@ define void @f3() { } define void @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lb %r0, 524287 ; CHECK: br %r14 %ptr = inttoptr i64 524287 to i8 * @@ -39,7 +39,7 @@ define void @f4() { } define void @f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: lb %r0, 524288 ; CHECK: br %r14 %ptr = inttoptr i64 524288 to i8 * diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll new file mode 100644 index 0000000000000..8839aade7a0e3 --- /dev/null +++ b/test/CodeGen/SystemZ/alias-01.ll @@ -0,0 +1,19 @@ +; Test 32-bit ANDs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check that there are no spills. +define void @f1(<16 x i32> *%src1, <16 x float> *%dest) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r15 +; CHECK: br %r14 + %val = load <16 x i32> *%src1, !tbaa !1 + %add = add <16 x i32> %val, %val + %res = bitcast <16 x i32> %add to <16 x float> + store <16 x float> %res, <16 x float> *%dest, !tbaa !2 + ret void +} + +!0 = metadata !{ metadata !"root" } +!1 = metadata !{ metadata !"set1", metadata !0 } +!2 = metadata !{ metadata !"set2", metadata !0 } diff --git a/test/CodeGen/SystemZ/alloca-01.ll b/test/CodeGen/SystemZ/alloca-01.ll index 1852c91350592..2ddefd70cc9d2 100644 --- a/test/CodeGen/SystemZ/alloca-01.ll +++ b/test/CodeGen/SystemZ/alloca-01.ll @@ -1,8 +1,7 @@ ; Test variable-sized allocas and addresses based on them in cases where ; stack arguments are needed. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK1 -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK2 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C @@ -15,17 +14,6 @@ declare i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 %f, i64 %g) ; There are two stack arguments, so an offset of 160 + 2 * 8 == 176 ; is added to the copy of %r15. define i64 @f1(i64 %length, i64 %index) { -; The full allocation sequence is: -; -; la %r0, 7(%r2) 1 -; nill %r0, 0xfff8 1 -; lgr %r1, %r15 2 -; sgr %r1, %r0 1 2 -; lgr %r15, %r1 2 -; -; The third instruction does not depend on the first two, so check for -; two fully-ordered sequences. -; ; FIXME: a better sequence would be: ; ; lgr %r1, %r15 @@ -33,38 +21,34 @@ define i64 @f1(i64 %length, i64 %index) { ; nill %r1, 0xfff8 ; lgr %r15, %r1 ; -; CHECK1: f1: -; CHECK1: la %r0, 7(%r2) -; CHECK1: nill %r0, 65528 -; CHECK1: sgr %r1, %r0 -; CHECK1: lgr %r15, %r1 -; -; CHECK2: f1: -; CHECK2: lgr %r1, %r15 -; CHECK2: sgr %r1, %r0 -; CHECK2: lgr %r15, %r1 +; CHECK-LABEL: f1: +; CHECK-DAG: la [[REG1:%r[0-5]]], 7(%r2) +; CHECK-DAG: nill [[REG1]], 65528 +; CHECK-DAG: lgr [[REG2:%r[0-5]]], %r15 +; CHECK: sgr [[REG2]], [[REG1]] +; CHECK: lgr %r15, [[REG2]] ; -; CHECK-A: f1: +; CHECK-A-LABEL: f1: ; CHECK-A: lgr %r15, %r1 ; CHECK-A: la %r2, 176(%r1) ; -; CHECK-B: f1: +; CHECK-B-LABEL: f1: ; CHECK-B: lgr %r15, %r1 ; CHECK-B: la %r3, 177(%r1) ; -; CHECK-C: f1: +; CHECK-C-LABEL: f1: ; CHECK-C: lgr %r15, %r1 ; CHECK-C: la %r4, 4095({{%r3,%r1|%r1,%r3}}) ; -; CHECK-D: f1: +; CHECK-D-LABEL: f1: ; CHECK-D: lgr %r15, %r1 ; CHECK-D: lay %r5, 4096({{%r3,%r1|%r1,%r3}}) ; -; CHECK-E: f1: +; CHECK-E-LABEL: f1: ; CHECK-E: lgr %r15, %r1 ; CHECK-E: lay %r6, 4271({{%r3,%r1|%r1,%r3}}) ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: lgr %r11, %r15 ; CHECK-FP: lmg %r6, %r15, 224(%r11) %a = alloca i8, i64 %length diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll index fbb095f4d12d5..b5787b102358e 100644 --- a/test/CodeGen/SystemZ/alloca-02.ll +++ b/test/CodeGen/SystemZ/alloca-02.ll @@ -9,40 +9,43 @@ declare i64 @bar(i8 *%a) define i64 @f1(i64 %length, i64 %index) { -; CHECK-A: f1: +; CHECK-A-LABEL: f1: ; CHECK-A: lgr %r15, [[ADDR:%r[1-5]]] ; CHECK-A: la %r2, 160([[ADDR]]) ; CHECK-A: mvi 0(%r2), 0 ; -; CHECK-B: f1: +; CHECK-B-LABEL: f1: ; CHECK-B: lgr %r15, [[ADDR:%r[1-5]]] ; CHECK-B: la %r2, 160([[ADDR]]) ; CHECK-B: mvi 4095(%r2), 1 ; -; CHECK-C: f1: +; CHECK-C-LABEL: f1: ; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]] -; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) -; CHECK-C: mvi 0([[TMP]]), 2 +; CHECK-C-DAG: la %r2, 160([[ADDR]]) +; CHECK-C-DAG: lhi [[TMP:%r[0-5]]], 2 +; CHECK-C: stc [[TMP]], 0({{%r3,%r2|%r2,%r3}}) ; -; CHECK-D: f1: +; CHECK-D-LABEL: f1: ; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]] -; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) -; CHECK-D: mvi 4095([[TMP]]), 3 +; CHECK-D-DAG: la %r2, 160([[ADDR]]) +; CHECK-D-DAG: lhi [[TMP:%r[0-5]]], 3 +; CHECK-D: stc [[TMP]], 4095({{%r3,%r2|%r2,%r3}}) ; -; CHECK-E: f1: +; CHECK-E-LABEL: f1: ; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]] -; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) -; CHECK-E: mviy 4096([[TMP]]), 4 +; CHECK-E-DAG: la %r2, 160([[ADDR]]) +; CHECK-E-DAG: lhi [[TMP:%r[0-5]]], 4 +; CHECK-E: stcy [[TMP]], 4096({{%r3,%r2|%r2,%r3}}) %a = alloca i8, i64 %length - store i8 0, i8 *%a + store volatile i8 0, i8 *%a %b = getelementptr i8 *%a, i64 4095 - store i8 1, i8 *%b + store volatile i8 1, i8 *%b %c = getelementptr i8 *%a, i64 %index - store i8 2, i8 *%c + store volatile i8 2, i8 *%c %d = getelementptr i8 *%c, i64 4095 - store i8 3, i8 *%d + store volatile i8 3, i8 *%d %e = getelementptr i8 *%d, i64 1 - store i8 4, i8 *%e + store volatile i8 4, i8 *%e %count = call i64 @bar(i8 *%a) %res = add i64 %count, 1 ret i64 %res diff --git a/test/CodeGen/SystemZ/and-01.ll b/test/CodeGen/SystemZ/and-01.ll index 8dd106b7c015a..3b230ba1081f0 100644 --- a/test/CodeGen/SystemZ/and-01.ll +++ b/test/CodeGen/SystemZ/and-01.ll @@ -1,10 +1,13 @@ ; Test 32-bit ANDs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() ; Check NR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: nr %r2, %r3 ; CHECK: br %r14 %and = and i32 %a, %b @@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the N range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: n %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned N range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: n %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use NY instead of N. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ny %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned NY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ny %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: n %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned NY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ny %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the NY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ny %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: n %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that N allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: n %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that NY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: ny %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %and = and i32 %a, %b ret i32 %and } + +; Check that ANDs of spilled values can use N rather than NR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: n %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %and0 = and i32 %ret, %val0 + %and1 = and i32 %and0, %val1 + %and2 = and i32 %and1, %val2 + %and3 = and i32 %and2, %val3 + %and4 = and i32 %and3, %val4 + %and5 = and i32 %and4, %val5 + %and6 = and i32 %and5, %val6 + %and7 = and i32 %and6, %val7 + %and8 = and i32 %and7, %val8 + %and9 = and i32 %and8, %val9 + + ret i32 %and9 +} diff --git a/test/CodeGen/SystemZ/and-02.ll b/test/CodeGen/SystemZ/and-02.ll index a0fff81492ad9..a7f08b7bb7903 100644 --- a/test/CodeGen/SystemZ/and-02.ll +++ b/test/CodeGen/SystemZ/and-02.ll @@ -1,93 +1,226 @@ ; Test 32-bit ANDs in which the second operand is constant. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s -; Check the lowest useful NILF value. +; ANDs with 1 can use NILF. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: nilf %r2, 1 ; CHECK: br %r14 %and = and i32 %a, 1 ret i32 %and } +; ...but RISBLG is available as a three-address form. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: risblg %r2, %r3, 31, 159, 0 +; CHECK: br %r14 + %and = and i32 %b, 1 + ret i32 %and +} + +; ...same for 4. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: risblg %r2, %r3, 29, 157, 0 +; CHECK: br %r14 + %and = and i32 %b, 4 + ret i32 %and +} + +; ANDs with 5 must use NILF. +define i32 @f4(i32 %a) { +; CHECK-LABEL: f4: +; CHECK: nilf %r2, 5 +; CHECK: br %r14 + %and = and i32 %a, 5 + ret i32 %and +} + +; ...a single RISBLG isn't enough. +define i32 @f5(i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK-NOT: risb +; CHECK: br %r14 + %and = and i32 %b, 5 + ret i32 %and +} + ; Check the highest 16-bit constant that must be handled by NILF. -define i32 @f2(i32 %a) { -; CHECK: f2: +define i32 @f6(i32 %a) { +; CHECK-LABEL: f6: +; CHECK: nilf %r2, 65533 +; CHECK: br %r14 + %and = and i32 %a, 65533 + ret i32 %and +} + +; ...a single RISBLG isn't enough. +define i32 @f7(i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK-NOT: risb +; CHECK: br %r14 + %and = and i32 %b, 65533 + ret i32 %and +} + +; Check the next highest value, which can use NILF. +define i32 @f8(i32 %a) { +; CHECK-LABEL: f8: ; CHECK: nilf %r2, 65534 ; CHECK: br %r14 %and = and i32 %a, 65534 ret i32 %and } +; ...although the three-address case should use RISBLG. +define i32 @f9(i32 %a, i32 %b) { +; CHECK-LABEL: f9: +; CHECK: risblg %r2, %r3, 16, 158, 0 +; CHECK: br %r14 + %and = and i32 %b, 65534 + ret i32 %and +} + ; ANDs of 0xffff are zero extensions from i16. -define i32 @f3(i32 %a) { -; CHECK: f3: -; CHECK: llhr %r2, %r2 +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: llhr %r2, %r3 ; CHECK: br %r14 - %and = and i32 %a, 65535 + %and = and i32 %b, 65535 ret i32 %and } ; Check the next value up, which must again use NILF. -define i32 @f4(i32 %a) { -; CHECK: f4: +define i32 @f11(i32 %a) { +; CHECK-LABEL: f11: ; CHECK: nilf %r2, 65536 ; CHECK: br %r14 %and = and i32 %a, 65536 ret i32 %and } -; Check the lowest useful NILH value. (LLHR is used instead of NILH of 0.) -define i32 @f5(i32 %a) { -; CHECK: f5: +; ...but the three-address case can use RISBLG. +define i32 @f12(i32 %a, i32 %b) { +; CHECK-LABEL: f12: +; CHECK: risblg %r2, %r3, 15, 143, 0 +; CHECK: br %r14 + %and = and i32 %b, 65536 + ret i32 %and +} + +; Check the lowest useful NILH value. +define i32 @f13(i32 %a) { +; CHECK-LABEL: f13: ; CHECK: nilh %r2, 1 ; CHECK: br %r14 %and = and i32 %a, 131071 ret i32 %and } +; ...but RISBLG is OK in the three-address case. +define i32 @f14(i32 %a, i32 %b) { +; CHECK-LABEL: f14: +; CHECK: risblg %r2, %r3, 15, 159, 0 +; CHECK: br %r14 + %and = and i32 %b, 131071 + ret i32 %and +} + ; Check the highest useful NILF value. -define i32 @f6(i32 %a) { -; CHECK: f6: +define i32 @f15(i32 %a) { +; CHECK-LABEL: f15: ; CHECK: nilf %r2, 4294901758 ; CHECK: br %r14 %and = and i32 %a, -65538 ret i32 %and } -; Check the highest useful NILH value, which is one up from the above. -define i32 @f7(i32 %a) { -; CHECK: f7: +; Check the next value up, which is the highest useful NILH value. +define i32 @f16(i32 %a) { +; CHECK-LABEL: f16: ; CHECK: nilh %r2, 65534 ; CHECK: br %r14 %and = and i32 %a, -65537 ret i32 %and } -; Check the low end of the NILL range, which is one up again. -define i32 @f8(i32 %a) { -; CHECK: f8: +; Check the next value up, which is the first useful NILL value. +define i32 @f17(i32 %a) { +; CHECK-LABEL: f17: ; CHECK: nill %r2, 0 ; CHECK: br %r14 %and = and i32 %a, -65536 ret i32 %and } -; Check the next value up. -define i32 @f9(i32 %a) { -; CHECK: f9: +; ...although the three-address case should use RISBLG. +define i32 @f18(i32 %a, i32 %b) { +; CHECK-LABEL: f18: +; CHECK: risblg %r2, %r3, 0, 143, 0 +; CHECK: br %r14 + %and = and i32 %b, -65536 + ret i32 %and +} + +; Check the next value up again, which can still use NILL. +define i32 @f19(i32 %a) { +; CHECK-LABEL: f19: ; CHECK: nill %r2, 1 ; CHECK: br %r14 %and = and i32 %a, -65535 ret i32 %and } -; Check the highest useful NILL value. -define i32 @f10(i32 %a) { -; CHECK: f10: +; Check the next value up again, which cannot use RISBLG. +define i32 @f20(i32 %a, i32 %b) { +; CHECK-LABEL: f20: +; CHECK-NOT: risb +; CHECK: br %r14 + %and = and i32 %b, -65534 + ret i32 %and +} + +; Check the last useful mask, which can use NILL. +define i32 @f21(i32 %a) { +; CHECK-LABEL: f21: ; CHECK: nill %r2, 65534 ; CHECK: br %r14 %and = and i32 %a, -2 ret i32 %and } + +; ...or RISBLG for the three-address case. +define i32 @f22(i32 %a, i32 %b) { +; CHECK-LABEL: f22: +; CHECK: risblg %r2, %r3, 0, 158, 0 +; CHECK: br %r14 + %and = and i32 %b, -2 + ret i32 %and +} + +; Test that RISBLG can be used when inserting a non-wraparound mask +; into another register. +define i64 @f23(i64 %a, i32 %b) { +; CHECK-LABEL: f23: +; CHECK: risblg %r2, %r3, 30, 158, 0 +; CHECK: br %r14 + %and1 = and i64 %a, -4294967296 + %and2 = and i32 %b, 2 + %ext = zext i32 %and2 to i64 + %or = or i64 %and1, %ext + ret i64 %or +} + +; ...and when inserting a wrap-around mask. +define i64 @f24(i64 %a, i32 %b) { +; CHECK-LABEL: f24: +; CHECK: risblg %r2, %r3, 30, 156 +; CHECK: br %r14 + %and1 = and i64 %a, -4294967296 + %and2 = and i32 %b, -5 + %ext = zext i32 %and2 to i64 + %or = or i64 %and1, %ext + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/and-03.ll b/test/CodeGen/SystemZ/and-03.ll index 3fe8d3cf3bf88..a0560d46e4ea4 100644 --- a/test/CodeGen/SystemZ/and-03.ll +++ b/test/CodeGen/SystemZ/and-03.ll @@ -1,10 +1,13 @@ ; Test 64-bit ANDs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() ; Check NGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ngr %r2, %r3 ; CHECK: br %r14 %and = and i64 %a, %b @@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check NG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ng %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned NG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ng %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: ng %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned NG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ng %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the NG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ng %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: ng %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that NG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ng %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %and = and i64 %a, %b ret i64 %and } + +; Check that ANDs of spilled values can use NG rather than NGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: ng %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %and0 = and i64 %ret, %val0 + %and1 = and i64 %and0, %val1 + %and2 = and i64 %and1, %val2 + %and3 = and i64 %and2, %val3 + %and4 = and i64 %and3, %val4 + %and5 = and i64 %and4, %val5 + %and6 = and i64 %and5, %val6 + %and7 = and i64 %and6, %val7 + %and8 = and i64 %and7, %val8 + %and9 = and i64 %and8, %val9 + + ret i64 %and9 +} diff --git a/test/CodeGen/SystemZ/and-04.ll b/test/CodeGen/SystemZ/and-04.ll index 62def60026e19..efb21f36425cd 100644 --- a/test/CodeGen/SystemZ/and-04.ll +++ b/test/CodeGen/SystemZ/and-04.ll @@ -2,13 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; There is no 64-bit AND instruction for a mask of 1. -; FIXME: we ought to be able to require "ngr %r2, %r0", but at the moment, -; two-address optimisations force "ngr %r0, %r2; lgr %r2, %r0" instead. +; Use RISBG for a single bit. define i64 @f1(i64 %a) { -; CHECK: f1: -; CHECK: lghi %r0, 1 -; CHECK: ngr +; CHECK-LABEL: f1: +; CHECK: risbg %r2, %r2, 63, 191, 0 ; CHECK: br %r14 %and = and i64 %a, 1 ret i64 %and @@ -16,165 +13,171 @@ define i64 @f1(i64 %a) { ; Likewise 0xfffe. define i64 @f2(i64 %a) { -; CHECK: f2: -; CHECK: llill %r0, 65534 -; CHECK: ngr +; CHECK-LABEL: f2: +; CHECK: risbg %r2, %r2, 48, 190, 0 ; CHECK: br %r14 %and = and i64 %a, 65534 ret i64 %and } ; ...but 0xffff is a 16-bit zero extension. -define i64 @f3(i64 %a) { -; CHECK: f3: -; CHECK: llghr %r2, %r2 +define i64 @f3(i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: llghr %r2, %r3 ; CHECK: br %r14 - %and = and i64 %a, 65535 + %and = and i64 %b, 65535 ret i64 %and } -; Check the next value up, which again has no dedicated instruction. +; Check the next value up, which can again use RISBG. define i64 @f4(i64 %a) { -; CHECK: f4: -; CHECK: llilh %r0, 1 -; CHECK: ngr +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r2, 47, 175, 0 ; CHECK: br %r14 %and = and i64 %a, 65536 ret i64 %and } -; Check 0xfffffffe. +; Check 0xfffffffe, which can also use RISBG. define i64 @f5(i64 %a) { -; CHECK: f5: -; CHECK: lilf %r0, 4294967294 -; CHECK: ngr +; CHECK-LABEL: f5: +; CHECK: risbg %r2, %r2, 32, 190, 0 ; CHECK: br %r14 %and = and i64 %a, 4294967294 ret i64 %and } ; Check the next value up, which is a 32-bit zero extension. -define i64 @f6(i64 %a) { -; CHECK: f6: -; CHECK: llgfr %r2, %r2 +define i64 @f6(i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: llgfr %r2, %r3 ; CHECK: br %r14 - %and = and i64 %a, 4294967295 + %and = and i64 %b, 4294967295 ret i64 %and } ; Check the lowest useful NIHF value (0x00000001_ffffffff). define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: nihf %r2, 1 ; CHECK: br %r14 %and = and i64 %a, 8589934591 ret i64 %and } -; Check the low end of the NIHH range (0x0000ffff_ffffffff). -define i64 @f8(i64 %a) { -; CHECK: f8: -; CHECK: nihh %r2, 0 +; ...but RISBG can be used if a three-address form is useful. +define i64 @f8(i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: risbg %r2, %r3, 31, 191, 0 ; CHECK: br %r14 - %and = and i64 %a, 281474976710655 + %and = and i64 %b, 8589934591 ret i64 %and } -; Check the highest useful NIHH value (0xfffeffff_ffffffff). +; Check the lowest NIHH value outside the RISBG range (0x0002ffff_ffffffff). define i64 @f9(i64 %a) { -; CHECK: f9: -; CHECK: nihh %r2, 65534 +; CHECK-LABEL: f9: +; CHECK: nihh %r2, 2 ; CHECK: br %r14 - %and = and i64 %a, -281474976710657 + %and = and i64 %a, 844424930131967 ret i64 %and } -; Check the highest useful NIHF value (0xfffefffe_ffffffff). +; Check the highest NIHH value outside the RISBG range (0xfffaffff_ffffffff). define i64 @f10(i64 %a) { -; CHECK: f10: -; CHECK: nihf %r2, 4294901758 +; CHECK-LABEL: f10: +; CHECK: nihh %r2, 65530 ; CHECK: br %r14 - %and = and i64 %a, -281479271677953 + %and = and i64 %a, -1407374883553281 ret i64 %and } -; Check the low end of the NIHL range (0xffff0000_ffffffff). +; Check the highest useful NIHF value (0xfffefffe_ffffffff). define i64 @f11(i64 %a) { -; CHECK: f11: -; CHECK: nihl %r2, 0 +; CHECK-LABEL: f11: +; CHECK: nihf %r2, 4294901758 ; CHECK: br %r14 - %and = and i64 %a, -281470681743361 + %and = and i64 %a, -281479271677953 ret i64 %and } -; Check the highest useful NIHL value (0xfffffffe_ffffffff). +; Check the lowest NIHL value outside the RISBG range (0xffff0002_ffffffff). define i64 @f12(i64 %a) { -; CHECK: f12: -; CHECK: nihl %r2, 65534 +; CHECK-LABEL: f12: +; CHECK: nihl %r2, 2 ; CHECK: br %r14 - %and = and i64 %a, -4294967297 + %and = and i64 %a, -281462091808769 ret i64 %and } -; Check the low end of the NILF range (0xffffffff_00000000). +; Check the highest NIHL value outside the RISBG range (0xfffffffa_ffffffff). define i64 @f13(i64 %a) { -; CHECK: f13: -; CHECK: nilf %r2, 0 +; CHECK-LABEL: f13: +; CHECK: nihl %r2, 65530 ; CHECK: br %r14 - %and = and i64 %a, -4294967296 + %and = and i64 %a, -21474836481 ret i64 %and } -; Check the low end of the NILH range (0xffffffff_0000ffff). +; Check the lowest NILF value outside the RISBG range (0xffffffff_00000002). define i64 @f14(i64 %a) { -; CHECK: f14: -; CHECK: nilh %r2, 0 +; CHECK-LABEL: f14: +; CHECK: nilf %r2, 2 ; CHECK: br %r14 - %and = and i64 %a, -4294901761 + %and = and i64 %a, -4294967294 ret i64 %and } -; Check the next value up, which must use NILF. +; Check the lowest NILH value outside the RISBG range (0xffffffff_0002ffff). define i64 @f15(i64 %a) { -; CHECK: f15: -; CHECK: nilf %r2, 65536 +; CHECK-LABEL: f15: +; CHECK: nilh %r2, 2 ; CHECK: br %r14 - %and = and i64 %a, -4294901760 + %and = and i64 %a, -4294770689 ret i64 %and } -; Check the maximum useful NILF value (0xffffffff_fffefffe). +; Check the next value up, which must use NILF. define i64 @f16(i64 %a) { -; CHECK: f16: -; CHECK: nilf %r2, 4294901758 +; CHECK-LABEL: f16: +; CHECK: nilf %r2, 196608 ; CHECK: br %r14 - %and = and i64 %a, -65538 + %and = and i64 %a, -4294770688 ret i64 %and } -; Check the highest useful NILH value, which is one greater than the above. +; Check the highest NILH value outside the RISBG range (0xffffffff_fffaffff). define i64 @f17(i64 %a) { -; CHECK: f17: -; CHECK: nilh %r2, 65534 +; CHECK-LABEL: f17: +; CHECK: nilh %r2, 65530 ; CHECK: br %r14 - %and = and i64 %a, -65537 + %and = and i64 %a, -327681 ret i64 %and } -; Check the low end of the NILL range, which is one greater again. +; Check the maximum useful NILF value (0xffffffff_fffefffe). define i64 @f18(i64 %a) { -; CHECK: f18: -; CHECK: nill %r2, 0 +; CHECK-LABEL: f18: +; CHECK: nilf %r2, 4294901758 ; CHECK: br %r14 - %and = and i64 %a, -65536 + %and = and i64 %a, -65538 ret i64 %and } -; Check the highest useful NILL value. +; Check the lowest NILL value outside the RISBG range (0xffffffff_ffff0002). define i64 @f19(i64 %a) { -; CHECK: f19: -; CHECK: nill %r2, 65534 +; CHECK-LABEL: f19: +; CHECK: nill %r2, 2 +; CHECK: br %r14 + %and = and i64 %a, -65534 + ret i64 %and +} + +; Check the highest NILL value outside the RISBG range. +define i64 @f20(i64 %a) { +; CHECK-LABEL: f20: +; CHECK: nill %r2, 65530 ; CHECK: br %r14 - %and = and i64 %a, -2 + %and = and i64 %a, -6 ret i64 %and } diff --git a/test/CodeGen/SystemZ/and-05.ll b/test/CodeGen/SystemZ/and-05.ll index 457391165d5ea..dafd9d5c51b0b 100644 --- a/test/CodeGen/SystemZ/and-05.ll +++ b/test/CodeGen/SystemZ/and-05.ll @@ -4,7 +4,7 @@ ; Check the lowest useful constant, expressed as a signed integer. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ni 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) { ; Check the highest useful constant, expressed as a signed integer. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) { ; Check the lowest useful constant, expressed as an unsigned integer. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ni 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) { ; Check the highest useful constant, expressed as a unsigned integer. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) { ; Check the high end of the NI range. define void @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ni 4095(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -60,7 +60,7 @@ define void @f5(i8 *%src) { ; Check the next byte up, which should use NIY instead of NI. define void @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: niy 4096(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -72,7 +72,7 @@ define void @f6(i8 *%src) { ; Check the high end of the NIY range. define void @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: niy 524287(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -85,7 +85,7 @@ define void @f7(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, 524288 ; CHECK: ni 0(%r2), 127 ; CHECK: br %r14 @@ -98,7 +98,7 @@ define void @f8(i8 *%src) { ; Check the high end of the negative NIY range. define void @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: niy -1(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -110,7 +110,7 @@ define void @f9(i8 *%src) { ; Check the low end of the NIY range. define void @f10(i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: niy -524288(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -123,7 +123,7 @@ define void @f10(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f11(i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r2, -524289 ; CHECK: ni 0(%r2), 127 ; CHECK: br %r14 @@ -136,7 +136,7 @@ define void @f11(i8 *%src) { ; Check that NI does not allow an index define void @f12(i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agr %r2, %r3 ; CHECK: ni 4095(%r2), 127 ; CHECK: br %r14 @@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) { ; Check that NIY does not allow an index define void @f13(i64 %src, i64 %index) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agr %r2, %r3 ; CHECK: niy 4096(%r2), 127 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/and-06.ll b/test/CodeGen/SystemZ/and-06.ll index bbb5e7b7b9dda..f796618dd4f45 100644 --- a/test/CodeGen/SystemZ/and-06.ll +++ b/test/CodeGen/SystemZ/and-06.ll @@ -5,7 +5,7 @@ ; Zero extension to 32 bits, negative constant. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) { ; Zero extension to 64 bits, negative constant. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) { ; Zero extension to 32 bits, positive constant. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) { ; Zero extension to 64 bits, positive constant. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) { ; Sign extension to 32 bits, negative constant. define void @f5(i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) { ; Sign extension to 64 bits, negative constant. define void @f6(i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) { ; Sign extension to 32 bits, positive constant. define void @f7(i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) { ; Sign extension to 64 bits, positive constant. define void @f8(i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ni 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/and-07.ll b/test/CodeGen/SystemZ/and-07.ll new file mode 100644 index 0000000000000..ad4c4af59fd71 --- /dev/null +++ b/test/CodeGen/SystemZ/and-07.ll @@ -0,0 +1,39 @@ +; Test the three-operand forms of AND. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check NRK. +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f1: +; CHECK: nrk %r2, %r3, %r4 +; CHECK: br %r14 + %and = and i32 %b, %c + ret i32 %and +} + +; Check that we can still use NR in obvious cases. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: nr %r2, %r3 +; CHECK: br %r14 + %and = and i32 %a, %b + ret i32 %and +} + +; Check NGRK. +define i64 @f3(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: f3: +; CHECK: ngrk %r2, %r3, %r4 +; CHECK: br %r14 + %and = and i64 %b, %c + ret i64 %and +} + +; Check that we can still use NGR in obvious cases. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: ngr %r2, %r3 +; CHECK: br %r14 + %and = and i64 %a, %b + ret i64 %and +} diff --git a/test/CodeGen/SystemZ/and-08.ll b/test/CodeGen/SystemZ/and-08.ll new file mode 100644 index 0000000000000..7ded115aedfff --- /dev/null +++ b/test/CodeGen/SystemZ/and-08.ll @@ -0,0 +1,378 @@ +; Test memory-to-memory ANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g1src = global i8 1 +@g1dst = global i8 1 +@g2src = global i16 2 +@g2dst = global i16 2 + +; Test the simple i8 case. +define void @f1(i8 *%ptr1) { +; CHECK-LABEL: f1: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %old = load i8 *%ptr2 + %and = and i8 %val, %old + store i8 %and, i8 *%ptr2 + ret void +} + +; ...and again in reverse. +define void @f2(i8 *%ptr1) { +; CHECK-LABEL: f2: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %old = load i8 *%ptr2 + %and = and i8 %old, %val + store i8 %and, i8 *%ptr2 + ret void +} + +; Test i8 cases where one value is zero-extended to 32 bits and the other +; sign-extended. +define void @f3(i8 *%ptr1) { +; CHECK-LABEL: f3: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %extval = zext i8 %val to i32 + %old = load i8 *%ptr2 + %extold = sext i8 %old to i32 + %and = and i32 %extval, %extold + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; ...and again with the extension types reversed. +define void @f4(i8 *%ptr1) { +; CHECK-LABEL: f4: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %extval = sext i8 %val to i32 + %old = load i8 *%ptr2 + %extold = zext i8 %old to i32 + %and = and i32 %extval, %extold + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; ...and again with two sign extensions. +define void @f5(i8 *%ptr1) { +; CHECK-LABEL: f5: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %extval = sext i8 %val to i32 + %old = load i8 *%ptr2 + %extold = sext i8 %old to i32 + %and = and i32 %extval, %extold + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; ...and again with two zero extensions. +define void @f6(i8 *%ptr1) { +; CHECK-LABEL: f6: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %extval = zext i8 %val to i32 + %old = load i8 *%ptr2 + %extold = zext i8 %old to i32 + %and = and i32 %extval, %extold + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is extended to 64 bits (just one case +; this time). +define void @f7(i8 *%ptr1) { +; CHECK-LABEL: f7: +; CHECK: nc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %extval = sext i8 %val to i64 + %old = load i8 *%ptr2 + %extold = zext i8 %old to i64 + %and = and i64 %extval, %extold + %trunc = trunc i64 %and to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test the simple i16 case. +define void @f8(i16 *%ptr1) { +; CHECK-LABEL: f8: +; CHECK: nc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %old = load i16 *%ptr2 + %and = and i16 %val, %old + store i16 %and, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is extended to 32 bits. +define void @f9(i16 *%ptr1) { +; CHECK-LABEL: f9: +; CHECK: nc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %extval = zext i16 %val to i32 + %old = load i16 *%ptr2 + %extold = sext i16 %old to i32 + %and = and i32 %extval, %extold + %trunc = trunc i32 %and to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is extended to 64 bits. +define void @f10(i16 *%ptr1) { +; CHECK-LABEL: f10: +; CHECK: nc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %extval = sext i16 %val to i64 + %old = load i16 *%ptr2 + %extold = zext i16 %old to i64 + %and = and i64 %extval, %extold + %trunc = trunc i64 %and to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test the simple i32 case. +define void @f11(i32 *%ptr1) { +; CHECK-LABEL: f11: +; CHECK: nc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %old = load i32 *%ptr2 + %and = and i32 %old, %val + store i32 %and, i32 *%ptr2 + ret void +} + +; Test i32 cases where the value is extended to 64 bits. +define void @f12(i32 *%ptr1) { +; CHECK-LABEL: f12: +; CHECK: nc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %extval = sext i32 %val to i64 + %old = load i32 *%ptr2 + %extold = zext i32 %old to i64 + %and = and i64 %extval, %extold + %trunc = trunc i64 %and to i32 + store i32 %trunc, i32 *%ptr2 + ret void +} + +; Test the i64 case. +define void @f13(i64 *%ptr1) { +; CHECK-LABEL: f13: +; CHECK: nc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2 + ret void +} + +; Make sure that we don't use NC if the first load is volatile. +define void @f14(i64 *%ptr1) { +; CHECK-LABEL: f14: +; CHECK-NOT: nc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load volatile i64 *%ptr1 + %old = load i64 *%ptr2 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2 + ret void +} + +; ...likewise the second. +define void @f15(i64 *%ptr1) { +; CHECK-LABEL: f15: +; CHECK-NOT: nc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + %old = load volatile i64 *%ptr2 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2 + ret void +} + +; ...likewise the store. +define void @f16(i64 *%ptr1) { +; CHECK-LABEL: f16: +; CHECK-NOT: nc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2 + %and = and i64 %old, %val + store volatile i64 %and, i64 *%ptr2 + ret void +} + +; Test that NC is not used for aligned loads and stores if there is +; no way of telling whether they alias. We don't want to use NC in +; cases where the addresses could be equal. +define void @f17(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f17: +; CHECK-NOT: nc +; CHECK: br %r14 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2 + ret void +} + +; ...but if one of the loads isn't aligned, we can't be sure. +define void @f18(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f18: +; CHECK-NOT: nc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2 + %old = load i64 *%ptr2 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2 + ret void +} + +; Repeat the previous test with the operands in the opposite order. +define void @f19(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f19: +; CHECK-NOT: nc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2 + %old = load i64 *%ptr2 + %and = and i64 %val, %old + store i64 %and, i64 *%ptr2 + ret void +} + +; ...and again with the other operand being unaligned. +define void @f20(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f20: +; CHECK-NOT: nc +; CHECK: br %r14 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2, align 2 + %and = and i64 %val, %old + store i64 %and, i64 *%ptr2, align 2 + ret void +} + +; Test a case where there is definite overlap. +define void @f21(i64 %base) { +; CHECK-LABEL: f21: +; CHECK-NOT: nc +; CHECK: br %r14 + %add = add i64 %base, 1 + %ptr1 = inttoptr i64 %base to i64 * + %ptr2 = inttoptr i64 %add to i64 * + %val = load i64 *%ptr1 + %old = load i64 *%ptr2, align 1 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2, align 1 + ret void +} + +; Test that we can use NC for global addresses for i8. +define void @f22(i8 *%ptr) { +; CHECK-LABEL: f22: +; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src +; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst +; CHECK: nc 0(1,[[DST]]), 0([[SRC]]) +; CHECK: br %r14 + %val = load i8 *@g1src + %old = load i8 *@g1dst + %and = and i8 %val, %old + store i8 %and, i8 *@g1dst + ret void +} + +; Test that we use NC even where LHRL and STHRL are available. +define void @f23(i16 *%ptr) { +; CHECK-LABEL: f23: +; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src +; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst +; CHECK: nc 0(2,[[DST]]), 0([[SRC]]) +; CHECK: br %r14 + %val = load i16 *@g2src + %old = load i16 *@g2dst + %and = and i16 %val, %old + store i16 %and, i16 *@g2dst + ret void +} + +; Test a case where offset disambiguation is enough. +define void @f24(i64 *%ptr1) { +; CHECK-LABEL: f24: +; CHECK: nc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1, align 1 + %old = load i64 *%ptr2, align 1 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2, align 1 + ret void +} + +; Test a case where TBAA tells us there is no alias. +define void @f25(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f25: +; CHECK: nc 0(8,%r3), 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !3 + %old = load i64 *%ptr2, align 2, !tbaa !4 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2, align 2, !tbaa !4 + ret void +} + +; Test a case where TBAA information is present but doesn't help. +define void @f26(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f26: +; CHECK-NOT: nc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !3 + %old = load i64 *%ptr2, align 2, !tbaa !3 + %and = and i64 %old, %val + store i64 %and, i64 *%ptr2, align 2, !tbaa !3 + ret void +} + +!0 = metadata !{ metadata !"root" } +!1 = metadata !{ metadata !"set1", metadata !0 } +!2 = metadata !{ metadata !"set2", metadata !0 } +!3 = metadata !{ metadata !1, metadata !1, i64 0} +!4 = metadata !{ metadata !2, metadata !2, i64 0} diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll index a6b80c54db51a..3105503eda53b 100644 --- a/test/CodeGen/SystemZ/args-01.ll +++ b/test/CodeGen/SystemZ/args-01.ll @@ -17,43 +17,42 @@ declare void @bar(i8, i16, i32, i64, float, double, fp128, i64, ; normally use %f0/%f2 as the first available 128-bit pair. This choice ; is hard-coded in the FP128 tests. ; -; The order of the CHECK-INT loads doesn't matter. The same goes for the -; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; The order of the CHECK-STACK stores doesn't matter. It would be OK to reorder ; them in response to future code changes. define void @foo() { -; CHECK-INT: foo: -; CHECK-INT: lhi %r2, 1 -; CHECK-INT: lhi %r3, 2 -; CHECK-INT: lhi %r4, 3 -; CHECK-INT: lghi %r5, 4 -; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT-LABEL: foo: +; CHECK-INT-DAG: lhi %r2, 1 +; CHECK-INT-DAG: lhi %r3, 2 +; CHECK-INT-DAG: lhi %r4, 3 +; CHECK-INT-DAG: lghi %r5, 4 +; CHECK-INT-DAG: la %r6, {{224|240}}(%r15) ; CHECK-INT: brasl %r14, bar@PLT ; -; CHECK-FLOAT: foo: +; CHECK-FLOAT-LABEL: foo: ; CHECK-FLOAT: lzer %f0 ; CHECK-FLOAT: lcebr %f4, %f0 ; CHECK-FLOAT: brasl %r14, bar@PLT ; -; CHECK-DOUBLE: foo: +; CHECK-DOUBLE-LABEL: foo: ; CHECK-DOUBLE: lzdr %f2 ; CHECK-DOUBLE: lcdbr %f6, %f2 ; CHECK-DOUBLE: brasl %r14, bar@PLT ; -; CHECK-FP128-1: foo: +; CHECK-FP128-1-LABEL: foo: ; CHECK-FP128-1: aghi %r15, -256 ; CHECK-FP128-1: lzxr %f0 -; CHECK-FP128-1: std %f0, 224(%r15) -; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1-DAG: std %f0, 224(%r15) +; CHECK-FP128-1-DAG: std %f2, 232(%r15) ; CHECK-FP128-1: brasl %r14, bar@PLT ; -; CHECK-FP128-2: foo: +; CHECK-FP128-2-LABEL: foo: ; CHECK-FP128-2: aghi %r15, -256 ; CHECK-FP128-2: lzxr %f0 -; CHECK-FP128-2: std %f0, 240(%r15) -; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2-DAG: std %f0, 240(%r15) +; CHECK-FP128-2-DAG: std %f2, 248(%r15) ; CHECK-FP128-2: brasl %r14, bar@PLT ; -; CHECK-STACK: foo: +; CHECK-STACK-LABEL: foo: ; CHECK-STACK: aghi %r15, -256 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) ; CHECK-STACK: stg [[REGISTER]], 216(%r15) diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll index 9ea111c2e021a..8686df88e6793 100644 --- a/test/CodeGen/SystemZ/args-02.ll +++ b/test/CodeGen/SystemZ/args-02.ll @@ -18,43 +18,42 @@ declare void @bar(i8 signext, i16 signext, i32 signext, i64, float, double, ; normally use %f0/%f2 as the first available 128-bit pair. This choice ; is hard-coded in the FP128 tests. ; -; The order of the CHECK-INT loads doesn't matter. The same goes for the -; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; The order of the CHECK-STACK stores doesn't matter. It would be OK to reorder ; them in response to future code changes. define void @foo() { -; CHECK-INT: foo: -; CHECK-INT: lghi %r2, -1 -; CHECK-INT: lghi %r3, -2 -; CHECK-INT: lghi %r4, -3 -; CHECK-INT: lghi %r5, -4 -; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT-LABEL: foo: +; CHECK-INT-DAG: lghi %r2, -1 +; CHECK-INT-DAG: lghi %r3, -2 +; CHECK-INT-DAG: lghi %r4, -3 +; CHECK-INT-DAG: lghi %r5, -4 +; CHECK-INT-DAG: la %r6, {{224|240}}(%r15) ; CHECK-INT: brasl %r14, bar@PLT ; -; CHECK-FLOAT: foo: +; CHECK-FLOAT-LABEL: foo: ; CHECK-FLOAT: lzer %f0 ; CHECK-FLOAT: lcebr %f4, %f0 ; CHECK-FLOAT: brasl %r14, bar@PLT ; -; CHECK-DOUBLE: foo: +; CHECK-DOUBLE-LABEL: foo: ; CHECK-DOUBLE: lzdr %f2 ; CHECK-DOUBLE: lcdbr %f6, %f2 ; CHECK-DOUBLE: brasl %r14, bar@PLT ; -; CHECK-FP128-1: foo: +; CHECK-FP128-1-LABEL: foo: ; CHECK-FP128-1: aghi %r15, -256 ; CHECK-FP128-1: lzxr %f0 -; CHECK-FP128-1: std %f0, 224(%r15) -; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1-DAG: std %f0, 224(%r15) +; CHECK-FP128-1-DAG: std %f2, 232(%r15) ; CHECK-FP128-1: brasl %r14, bar@PLT ; -; CHECK-FP128-2: foo: +; CHECK-FP128-2-LABEL: foo: ; CHECK-FP128-2: aghi %r15, -256 ; CHECK-FP128-2: lzxr %f0 -; CHECK-FP128-2: std %f0, 240(%r15) -; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2-DAG: std %f0, 240(%r15) +; CHECK-FP128-2-DAG: std %f2, 248(%r15) ; CHECK-FP128-2: brasl %r14, bar@PLT ; -; CHECK-STACK: foo: +; CHECK-STACK-LABEL: foo: ; CHECK-STACK: aghi %r15, -256 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) ; CHECK-STACK: stg [[REGISTER]], 216(%r15) diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll index f954d584fcf49..d7d3ea105df7b 100644 --- a/test/CodeGen/SystemZ/args-03.ll +++ b/test/CodeGen/SystemZ/args-03.ll @@ -18,43 +18,42 @@ declare void @bar(i8 zeroext, i16 zeroext, i32 zeroext, i64, float, double, ; normally use %f0/%f2 as the first available 128-bit pair. This choice ; is hard-coded in the FP128 tests. ; -; The order of the CHECK-INT loads doesn't matter. The same goes for the -; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; The order of the CHECK-STACK stores doesn't matter. It would be OK to reorder ; them in response to future code changes. define void @foo() { -; CHECK-INT: foo: -; CHECK-INT: lghi %r2, 255 -; CHECK-INT: llill %r3, 65534 -; CHECK-INT: llilf %r4, 4294967293 -; CHECK-INT: lghi %r5, -4 -; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT-LABEL: foo: +; CHECK-INT-DAG: lghi %r2, 255 +; CHECK-INT-DAG: llill %r3, 65534 +; CHECK-INT-DAG: llilf %r4, 4294967293 +; CHECK-INT-DAG: lghi %r5, -4 +; CHECK-INT-DAG: la %r6, {{224|240}}(%r15) ; CHECK-INT: brasl %r14, bar@PLT ; -; CHECK-FLOAT: foo: +; CHECK-FLOAT-LABEL: foo: ; CHECK-FLOAT: lzer %f0 ; CHECK-FLOAT: lcebr %f4, %f0 ; CHECK-FLOAT: brasl %r14, bar@PLT ; -; CHECK-DOUBLE: foo: +; CHECK-DOUBLE-LABEL: foo: ; CHECK-DOUBLE: lzdr %f2 ; CHECK-DOUBLE: lcdbr %f6, %f2 ; CHECK-DOUBLE: brasl %r14, bar@PLT ; -; CHECK-FP128-1: foo: +; CHECK-FP128-1-LABEL: foo: ; CHECK-FP128-1: aghi %r15, -256 ; CHECK-FP128-1: lzxr %f0 -; CHECK-FP128-1: std %f0, 224(%r15) -; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1-DAG: std %f0, 224(%r15) +; CHECK-FP128-1-DAG: std %f2, 232(%r15) ; CHECK-FP128-1: brasl %r14, bar@PLT ; -; CHECK-FP128-2: foo: +; CHECK-FP128-2-LABEL: foo: ; CHECK-FP128-2: aghi %r15, -256 ; CHECK-FP128-2: lzxr %f0 -; CHECK-FP128-2: std %f0, 240(%r15) -; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2-DAG: std %f0, 240(%r15) +; CHECK-FP128-2-DAG: std %f2, 248(%r15) ; CHECK-FP128-2: brasl %r14, bar@PLT ; -; CHECK-STACK: foo: +; CHECK-STACK-LABEL: foo: ; CHECK-STACK: aghi %r15, -256 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) ; CHECK-STACK: stg [[REGISTER]], 216(%r15) diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll index 8340494ff4dcd..1178bb4dafdf7 100644 --- a/test/CodeGen/SystemZ/args-04.ll +++ b/test/CodeGen/SystemZ/args-04.ll @@ -5,7 +5,7 @@ ; Do some arithmetic so that we can see the register being used. define i8 @f1(i8 %r2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ahi %r2, 1 ; CHECK: br %r14 %y = add i8 %r2, 1 @@ -13,21 +13,21 @@ define i8 @f1(i8 %r2) { } define i16 @f2(i8 %r2, i16 %r3) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: {{lr|lgr}} %r2, %r3 ; CHECK: br %r14 ret i16 %r3 } define i32 @f3(i8 %r2, i16 %r3, i32 %r4) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: {{lr|lgr}} %r2, %r4 ; CHECK: br %r14 ret i32 %r4 } define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: {{lr|lgr}} %r2, %r5 ; CHECK: br %r14 ret i64 %r5 @@ -35,7 +35,7 @@ define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) { ; Do some arithmetic so that we can see the register being used. define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aebr %f0, %f0 ; CHECK: br %r14 %y = fadd float %f0, %f0 @@ -43,7 +43,7 @@ define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) { } define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 ret double %f2 @@ -54,7 +54,7 @@ define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) { ; be copied. define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ld %f0, 0(%r6) ; CHECK: ld %f2, 8(%r6) ; CHECK: axbr %f0, %f0 @@ -68,7 +68,7 @@ define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lg %r2, 160(%r15) ; CHECK: br %r14 ret i64 %s1 @@ -76,7 +76,7 @@ define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1, float %f4) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ler %f0, %f4 ; CHECK: br %r14 ret float %f4 @@ -84,7 +84,7 @@ define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1, float %f4, double %f6) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ldr %f0, %f6 ; CHECK: br %r14 ret double %f6 @@ -92,7 +92,7 @@ define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: lg %r2, 168(%r15) ; CHECK: br %r14 ret i64 %s2 @@ -102,7 +102,7 @@ define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2, float %s3) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: le %f0, 180(%r15) ; CHECK: br %r14 ret float %s3 @@ -112,7 +112,7 @@ define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2, float %s3, fp128 %s4) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: lg [[REGISTER:%r[1-5]+]], 184(%r15) ; CHECK: ld %f0, 0([[REGISTER]]) ; CHECK: ld %f2, 8([[REGISTER]]) diff --git a/test/CodeGen/SystemZ/args-05.ll b/test/CodeGen/SystemZ/args-05.ll index 9fa193a68e571..8a6ef4c54ffef 100644 --- a/test/CodeGen/SystemZ/args-05.ll +++ b/test/CodeGen/SystemZ/args-05.ll @@ -4,7 +4,7 @@ ; Zero extension of something that is already zero-extended. define void @f1(i32 zeroext %r2, i64 *%r3) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: %r2 ; CHECK: stg %r2, 0(%r3) ; CHECK: br %r14 @@ -15,7 +15,7 @@ define void @f1(i32 zeroext %r2, i64 *%r3) { ; Sign extension of something that is already sign-extended. define void @f2(i32 signext %r2, i64 *%r3) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: %r2 ; CHECK: stg %r2, 0(%r3) ; CHECK: br %r14 @@ -26,7 +26,7 @@ define void @f2(i32 signext %r2, i64 *%r3) { ; Sign extension of something that is already zero-extended. define void @f3(i32 zeroext %r2, i64 *%r3) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgfr [[REGISTER:%r[0-5]+]], %r2 ; CHECK: stg [[REGISTER]], 0(%r3) ; CHECK: br %r14 @@ -37,7 +37,7 @@ define void @f3(i32 zeroext %r2, i64 *%r3) { ; Zero extension of something that is already sign-extended. define void @f4(i32 signext %r2, i64 *%r3) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llgfr [[REGISTER:%r[0-5]+]], %r2 ; CHECK: stg [[REGISTER]], 0(%r3) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/args-06.ll b/test/CodeGen/SystemZ/args-06.ll index b2f8bee2c6b51..644fcec982ef9 100644 --- a/test/CodeGen/SystemZ/args-06.ll +++ b/test/CodeGen/SystemZ/args-06.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ar %r2, %r3 ; CHECK: ar %r2, %r4 ; CHECK: ar %r2, %r5 @@ -22,13 +22,13 @@ define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) { } define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ar %r2, %r3 ; CHECK: ar %r2, %r4 ; CHECK: ar %r2, %r5 ; CHECK: ar %r2, %r6 -; CHECK: lh {{%r[0-5]}}, 166(%r15) -; CHECK: lh {{%r[0-5]}}, 174(%r15) +; CHECK: ah %r2, 166(%r15) +; CHECK: ah %r2, 174(%r15) ; CHECK: br %r14 %addb = add i16 %a, %b %addc = add i16 %addb, %c @@ -40,7 +40,7 @@ define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) { } define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ar %r2, %r3 ; CHECK: ar %r2, %r4 ; CHECK: ar %r2, %r5 @@ -58,7 +58,7 @@ define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { } define i64 @f4(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agr %r2, %r3 ; CHECK: agr %r2, %r4 ; CHECK: agr %r2, %r5 diff --git a/test/CodeGen/SystemZ/asm-01.ll b/test/CodeGen/SystemZ/asm-01.ll index 016d04c614cb5..801378c5fcbde 100644 --- a/test/CodeGen/SystemZ/asm-01.ll +++ b/test/CodeGen/SystemZ/asm-01.ll @@ -5,7 +5,7 @@ ; Check the lowest range. define void @f1(i64 %base) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * @@ -15,7 +15,7 @@ define void @f1(i64 %base) { ; Check the next lowest byte. define void @f2(i64 %base) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r2, -1 ; CHECK: blah 0(%r2) ; CHECK: br %r14 @@ -27,7 +27,7 @@ define void @f2(i64 %base) { ; Check the highest range. define void @f3(i64 %base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah 4095(%r2) ; CHECK: br %r14 %add = add i64 %base, 4095 @@ -38,7 +38,7 @@ define void @f3(i64 %base) { ; Check the next highest byte. define void @f4(i64 %base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: blah 0(%r2) ; CHECK: br %r14 @@ -50,7 +50,7 @@ define void @f4(i64 %base) { ; Check that indices aren't allowed define void @f5(i64 %base, i64 %index) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agr %r2, %r3 ; CHECK: blah 0(%r2) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-02.ll b/test/CodeGen/SystemZ/asm-02.ll index 12d8bec161cec..ad1e35bb36210 100644 --- a/test/CodeGen/SystemZ/asm-02.ll +++ b/test/CodeGen/SystemZ/asm-02.ll @@ -5,7 +5,7 @@ ; Check the lowest range. define void @f1(i64 %base) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * @@ -15,7 +15,7 @@ define void @f1(i64 %base) { ; Check the next lowest byte. define void @f2(i64 %base) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r2, -1 ; CHECK: blah 0(%r2) ; CHECK: br %r14 @@ -27,7 +27,7 @@ define void @f2(i64 %base) { ; Check the highest range. define void @f3(i64 %base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah 4095(%r2) ; CHECK: br %r14 %add = add i64 %base, 4095 @@ -38,7 +38,7 @@ define void @f3(i64 %base) { ; Check the next highest byte. define void @f4(i64 %base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: blah 0(%r2) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-03.ll b/test/CodeGen/SystemZ/asm-03.ll index a6f3f2a5cb605..fa3e1a7d01d8b 100644 --- a/test/CodeGen/SystemZ/asm-03.ll +++ b/test/CodeGen/SystemZ/asm-03.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i64 %base) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * diff --git a/test/CodeGen/SystemZ/asm-04.ll b/test/CodeGen/SystemZ/asm-04.ll index 0560949eb0696..af7ea9fdef94c 100644 --- a/test/CodeGen/SystemZ/asm-04.ll +++ b/test/CodeGen/SystemZ/asm-04.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i64 %base) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * diff --git a/test/CodeGen/SystemZ/asm-05.ll b/test/CodeGen/SystemZ/asm-05.ll index dae90b09eafec..e18cb757b142c 100644 --- a/test/CodeGen/SystemZ/asm-05.ll +++ b/test/CodeGen/SystemZ/asm-05.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i64 %base) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * diff --git a/test/CodeGen/SystemZ/asm-06.ll b/test/CodeGen/SystemZ/asm-06.ll index c0e24a3664868..f9848a2df6fcb 100644 --- a/test/CodeGen/SystemZ/asm-06.ll +++ b/test/CodeGen/SystemZ/asm-06.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r1, 1 ; CHECK: blah %r2 %r1 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define i64 @f1() { } define i64 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi %r1, 2 ; CHECK: blah %r2 %r1 ; CHECK: br %r14 @@ -21,7 +21,7 @@ define i64 @f2() { } define i64 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lhi %r1, 3 ; CHECK: blah %r2 %r1 ; CHECK: br %r14 @@ -30,7 +30,7 @@ define i64 @f3() { } define i64 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lghi %r1, 4 ; CHECK: blah %r2 %r1 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-07.ll b/test/CodeGen/SystemZ/asm-07.ll index e07286d9a4d60..bf63150cd818b 100644 --- a/test/CodeGen/SystemZ/asm-07.ll +++ b/test/CodeGen/SystemZ/asm-07.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r0, 1 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define i64 @f1() { } define i64 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi %r0, 2 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -21,7 +21,7 @@ define i64 @f2() { } define i64 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lhi %r0, 3 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -30,7 +30,7 @@ define i64 @f3() { } define i64 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lghi %r0, 4 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-08.ll b/test/CodeGen/SystemZ/asm-08.ll index 15abc4d0d2edd..166233752db22 100644 --- a/test/CodeGen/SystemZ/asm-08.ll +++ b/test/CodeGen/SystemZ/asm-08.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r0, 1 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define i64 @f1() { } define i64 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi %r0, 2 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -21,7 +21,7 @@ define i64 @f2() { } define i64 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lhi %r0, 3 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 @@ -30,7 +30,7 @@ define i64 @f3() { } define i64 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lghi %r0, 4 ; CHECK: blah %r2 %r0 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-09.ll b/test/CodeGen/SystemZ/asm-09.ll index 1541170924b7f..5cd7efb94009e 100644 --- a/test/CodeGen/SystemZ/asm-09.ll +++ b/test/CodeGen/SystemZ/asm-09.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i32 *%dst) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r0, 100 ; CHECK: blah %r0 ; CHECK: st %r0, 0(%r2) @@ -14,7 +14,7 @@ define void @f1(i32 *%dst) { } define void @f2(i32 *%dst) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi %r0, 101 ; CHECK: blah %r0 ; CHECK: st %r0, 0(%r2) @@ -25,7 +25,7 @@ define void @f2(i32 *%dst) { } define void @f3(i32 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lhi %r0, 102 ; CHECK: blah %r0 ; CHECK: st %r0, 0(%r2) @@ -37,7 +37,7 @@ define void @f3(i32 *%dst) { ; FIXME: this uses "lhi %r0, 103", but should use "lghi %r0, 103". define void @f4(i32 *%dst) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: blah %r0 ; CHECK: st %r0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +47,7 @@ define void @f4(i32 *%dst) { } define i64 @f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lghi %r2, 104 ; CHECK: blah %r2 ; CHECK: br %r14 @@ -56,7 +56,7 @@ define i64 @f5() { } define i64 @f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lghi %r2, 105 ; CHECK: blah %r2 ; CHECK: br %r14 @@ -65,7 +65,7 @@ define i64 @f6() { } define i64 @f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lghi %r2, 106 ; CHECK: blah %r2 ; CHECK: br %r14 @@ -74,7 +74,7 @@ define i64 @f7() { } define i64 @f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lghi %r2, 107 ; CHECK: blah %r2 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-10.ll b/test/CodeGen/SystemZ/asm-10.ll index 676c2028b0565..0eccc1972187c 100644 --- a/test/CodeGen/SystemZ/asm-10.ll +++ b/test/CodeGen/SystemZ/asm-10.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define float @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lzer %f1 ; CHECK: blah %f0 %f1 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define float @f1() { } define double @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lzdr %f1 ; CHECK: blah %f0 %f1 ; CHECK: br %r14 @@ -21,7 +21,7 @@ define double @f2() { } define double @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lzxr %f1 ; CHECK: blah %f0 %f1 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-11.ll b/test/CodeGen/SystemZ/asm-11.ll index 9bd8d7c33f01f..8aeb784134a36 100644 --- a/test/CodeGen/SystemZ/asm-11.ll +++ b/test/CodeGen/SystemZ/asm-11.ll @@ -4,7 +4,7 @@ ; Test 1 below the first valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi [[REG:%r[0-5]]], -1 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 0 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 0) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test the last valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah %r2 255 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 255) @@ -32,7 +32,7 @@ define i32 @f3() { ; Test 1 above the last valid value. define i32 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lhi [[REG:%r[0-5]]], 256 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-12.ll b/test/CodeGen/SystemZ/asm-12.ll index dd920f11fdec7..feecbacf09e8f 100644 --- a/test/CodeGen/SystemZ/asm-12.ll +++ b/test/CodeGen/SystemZ/asm-12.ll @@ -4,7 +4,7 @@ ; Test 1 below the first valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi [[REG:%r[0-5]]], -1 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 0 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 0) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test the last valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah %r2 4095 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4095) @@ -32,7 +32,7 @@ define i32 @f3() { ; Test 1 above the last valid value. define i32 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lhi [[REG:%r[0-5]]], 4096 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-13.ll b/test/CodeGen/SystemZ/asm-13.ll index af3fdb361533b..b88170079ecca 100644 --- a/test/CodeGen/SystemZ/asm-13.ll +++ b/test/CodeGen/SystemZ/asm-13.ll @@ -4,7 +4,7 @@ ; Test 1 below the first valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: iilf [[REG:%r[0-5]]], 4294934527 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 -32768 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32768) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test the last valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah %r2 32767 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32767) @@ -32,7 +32,7 @@ define i32 @f3() { ; Test 1 above the last valid value. define i32 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llill [[REG:%r[0-5]]], 32768 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-14.ll b/test/CodeGen/SystemZ/asm-14.ll index b6b28d6b32fc0..bcd8b1ebc3df5 100644 --- a/test/CodeGen/SystemZ/asm-14.ll +++ b/test/CodeGen/SystemZ/asm-14.ll @@ -4,7 +4,7 @@ ; Test 1 below the first valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: iilf [[REG:%r[0-5]]], 4294443007 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 -524288 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524288) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test the last valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: blah %r2 524287 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524287) @@ -32,7 +32,7 @@ define i32 @f3() { ; Test 1 above the last valid value. define i32 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llilh [[REG:%r[0-5]]], 8 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-15.ll b/test/CodeGen/SystemZ/asm-15.ll index 4d0e2b4c3be3b..886ee0e897dca 100644 --- a/test/CodeGen/SystemZ/asm-15.ll +++ b/test/CodeGen/SystemZ/asm-15.ll @@ -4,7 +4,7 @@ ; Test 1 below the valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: iilf [[REG:%r[0-5]]], 2147483646 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 2147483647 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test 1 above the valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llilh [[REG:%r[0-5]]], 32768 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-16.ll b/test/CodeGen/SystemZ/asm-16.ll index 4d0e2b4c3be3b..886ee0e897dca 100644 --- a/test/CodeGen/SystemZ/asm-16.ll +++ b/test/CodeGen/SystemZ/asm-16.ll @@ -4,7 +4,7 @@ ; Test 1 below the valid value. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: iilf [[REG:%r[0-5]]], 2147483646 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 @@ -14,7 +14,7 @@ define i32 @f1() { ; Test the first valid value. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: blah %r2 2147483647 ; CHECK: br %r14 %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647) @@ -23,7 +23,7 @@ define i32 @f2() { ; Test 1 above the valid value. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llilh [[REG:%r[0-5]]], 32768 ; CHECK: blah %r2 [[REG]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll new file mode 100644 index 0000000000000..7bc9da32ea950 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-17.ll @@ -0,0 +1,105 @@ +; Test explicit register names. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i32 GPRs. +define i32 @f1() { +; CHECK-LABEL: f1: +; CHECK: lhi %r4, 1 +; CHECK: blah %r4 +; CHECK: lr %r2, %r4 +; CHECK: br %r14 + %ret = call i32 asm "blah $0", "={r4},0" (i32 1) + ret i32 %ret +} + +; Test i64 GPRs. +define i64 @f2() { +; CHECK-LABEL: f2: +; CHECK: lghi %r4, 1 +; CHECK: blah %r4 +; CHECK: lgr %r2, %r4 +; CHECK: br %r14 + %ret = call i64 asm "blah $0", "={r4},0" (i64 1) + ret i64 %ret +} + +; Test i32 FPRs. +define float @f3() { +; CHECK-LABEL: f3: +; CHECK: lzer %f4 +; CHECK: blah %f4 +; CHECK: ler %f0, %f4 +; CHECK: br %r14 + %ret = call float asm "blah $0", "={f4},0" (float 0.0) + ret float %ret +} + +; Test i64 FPRs. +define double @f4() { +; CHECK-LABEL: f4: +; CHECK: lzdr %f4 +; CHECK: blah %f4 +; CHECK: ldr %f0, %f4 +; CHECK: br %r14 + %ret = call double asm "blah $0", "={f4},0" (double 0.0) + ret double %ret +} + +; Test i128 FPRs. +define void @f5(fp128 *%dest) { +; CHECK-LABEL: f5: +; CHECK: lzxr %f4 +; CHECK: blah %f4 +; CHECK-DAG: std %f4, 0(%r2) +; CHECK-DAG: std %f6, 8(%r2) +; CHECK: br %r14 + %ret = call fp128 asm "blah $0", "={f4},0" (fp128 0xL00000000000000000000000000000000) + store fp128 %ret, fp128 *%dest + ret void +} + +; Test clobbers of GPRs and CC. +define i32 @f6(i32 %in) { +; CHECK-LABEL: f6: +; CHECK: lr [[REG:%r[01345]]], %r2 +; CHECK: blah +; CHECK: lr %r2, [[REG]] +; CHECK: br %r14 + call void asm sideeffect "blah", "~{r2},~{cc}"() + ret i32 %in +} + +; Test clobbers of FPRs and CC. +define float @f7(float %in) { +; CHECK-LABEL: f7: +; CHECK: ler [[REG:%f[1-7]]], %f0 +; CHECK: blah +; CHECK: ler %f0, [[REG]] +; CHECK: br %r14 + call void asm sideeffect "blah", "~{f0},~{cc}"() + ret float %in +} + +; Test that both registers in a GR128 pair get hoisted. +define void @f8(i32 %count) { +; CHECK-LABEL: f8 +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lhi %r1, 1 +; CHECK: %loop +; CHECK-NOT: %r +; CHECK: blah %r0, %r1 +; CHECK: br %r14 +entry: + br label %loop + +loop: + %this = phi i32 [ %count, %entry ], [ %next, %loop ] + call void asm sideeffect "blah $0, $1", "{r0},{r1}" (i32 0, i32 1) + %next = sub i32 %this, 1 + %cmp = icmp ne i32 %next, 0 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll new file mode 100644 index 0000000000000..d60654b7863d5 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-18.ll @@ -0,0 +1,745 @@ +; Test high-word operations, using "h" constraints to force a high +; register and "r" constraints to force a low register. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test loads and stores involving mixtures of high and low registers. +define void @f1(i32 *%ptr1, i32 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lfh [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: l [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: lfh [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: ly [[REG4:%r[0-5]]], 524284(%r3) +; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK-DAG: stfh [[REG1]], 0(%r2) +; CHECK-DAG: st [[REG2]], 0(%r3) +; CHECK-DAG: stfh [[REG3]], 4096(%r2) +; CHECK-DAG: sty [[REG4]], 524284(%r3) +; CHECK: br %r14 + %ptr3 = getelementptr i32 *%ptr1, i64 1024 + %ptr4 = getelementptr i32 *%ptr2, i64 131071 + %old1 = load i32 *%ptr1 + %old2 = load i32 *%ptr2 + %old3 = load i32 *%ptr3 + %old4 = load i32 *%ptr4 + %res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3", + "=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4) + %new1 = extractvalue { i32, i32, i32, i32 } %res, 0 + %new2 = extractvalue { i32, i32, i32, i32 } %res, 1 + %new3 = extractvalue { i32, i32, i32, i32 } %res, 2 + %new4 = extractvalue { i32, i32, i32, i32 } %res, 3 + store i32 %new1, i32 *%ptr1 + store i32 %new2, i32 *%ptr2 + store i32 %new3, i32 *%ptr3 + store i32 %new4, i32 *%ptr4 + ret void +} + +; Test moves involving mixtures of high and low registers. +define i32 @f2(i32 %old) { +; CHECK-LABEL: f2: +; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 0, 159, 32 +; CHECK-DAG: lr %r3, %r2 +; CHECK: stepa [[REG1]], %r2, %r3 +; CHECK: risbhg {{%r[0-5]}}, [[REG1]], 0, 159, 0 +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: risblg %r2, [[REG2]], 0, 159, 32 +; CHECK: br %r14 + %tmp = call i32 asm "stepa $1, $2, $3", + "=h,0,{r2},{r3}"(i32 %old, i32 %old, i32 %old) + %new = call i32 asm "stepb $1, $2", "=&h,0,h"(i32 %tmp, i32 %tmp) + ret i32 %new +} + +; Test sign-extending 8-bit loads into mixtures of high and low registers. +define void @f3(i8 *%ptr1, i8 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK-DAG: lbh [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: lb [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: lbh [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: lb [[REG4:%r[0-5]]], 524287(%r3) +; CHECK: blah [[REG1]], [[REG2]] +; CHECK: br %r14 + %ptr3 = getelementptr i8 *%ptr1, i64 4096 + %ptr4 = getelementptr i8 *%ptr2, i64 524287 + %val1 = load i8 *%ptr1 + %val2 = load i8 *%ptr2 + %val3 = load i8 *%ptr3 + %val4 = load i8 *%ptr4 + %ext1 = sext i8 %val1 to i32 + %ext2 = sext i8 %val2 to i32 + %ext3 = sext i8 %val3 to i32 + %ext4 = sext i8 %val4 to i32 + call void asm sideeffect "blah $0, $1, $2, $3", + "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4) + ret void +} + +; Test sign-extending 16-bit loads into mixtures of high and low registers. +define void @f4(i16 *%ptr1, i16 *%ptr2) { +; CHECK-LABEL: f4: +; CHECK-DAG: lhh [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: lh [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: lhh [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: lhy [[REG4:%r[0-5]]], 524286(%r3) +; CHECK: blah [[REG1]], [[REG2]] +; CHECK: br %r14 + %ptr3 = getelementptr i16 *%ptr1, i64 2048 + %ptr4 = getelementptr i16 *%ptr2, i64 262143 + %val1 = load i16 *%ptr1 + %val2 = load i16 *%ptr2 + %val3 = load i16 *%ptr3 + %val4 = load i16 *%ptr4 + %ext1 = sext i16 %val1 to i32 + %ext2 = sext i16 %val2 to i32 + %ext3 = sext i16 %val3 to i32 + %ext4 = sext i16 %val4 to i32 + call void asm sideeffect "blah $0, $1, $2, $3", + "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4) + ret void +} + +; Test zero-extending 8-bit loads into mixtures of high and low registers. +define void @f5(i8 *%ptr1, i8 *%ptr2) { +; CHECK-LABEL: f5: +; CHECK-DAG: llch [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: llc [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: llch [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: llc [[REG4:%r[0-5]]], 524287(%r3) +; CHECK: blah [[REG1]], [[REG2]] +; CHECK: br %r14 + %ptr3 = getelementptr i8 *%ptr1, i64 4096 + %ptr4 = getelementptr i8 *%ptr2, i64 524287 + %val1 = load i8 *%ptr1 + %val2 = load i8 *%ptr2 + %val3 = load i8 *%ptr3 + %val4 = load i8 *%ptr4 + %ext1 = zext i8 %val1 to i32 + %ext2 = zext i8 %val2 to i32 + %ext3 = zext i8 %val3 to i32 + %ext4 = zext i8 %val4 to i32 + call void asm sideeffect "blah $0, $1, $2, $3", + "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4) + ret void +} + +; Test zero-extending 16-bit loads into mixtures of high and low registers. +define void @f6(i16 *%ptr1, i16 *%ptr2) { +; CHECK-LABEL: f6: +; CHECK-DAG: llhh [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: llh [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: llhh [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: llh [[REG4:%r[0-5]]], 524286(%r3) +; CHECK: blah [[REG1]], [[REG2]] +; CHECK: br %r14 + %ptr3 = getelementptr i16 *%ptr1, i64 2048 + %ptr4 = getelementptr i16 *%ptr2, i64 262143 + %val1 = load i16 *%ptr1 + %val2 = load i16 *%ptr2 + %val3 = load i16 *%ptr3 + %val4 = load i16 *%ptr4 + %ext1 = zext i16 %val1 to i32 + %ext2 = zext i16 %val2 to i32 + %ext3 = zext i16 %val3 to i32 + %ext4 = zext i16 %val4 to i32 + call void asm sideeffect "blah $0, $1, $2, $3", + "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4) + ret void +} + +; Test truncating stores of high and low registers into 8-bit memory. +define void @f7(i8 *%ptr1, i8 *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]] +; CHECK-DAG: stch [[REG1]], 0(%r2) +; CHECK-DAG: stc [[REG2]], 0(%r3) +; CHECK-DAG: stch [[REG1]], 4096(%r2) +; CHECK-DAG: stcy [[REG2]], 524287(%r3) +; CHECK: br %r14 + %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"() + %res1 = extractvalue { i32, i32 } %res, 0 + %res2 = extractvalue { i32, i32 } %res, 1 + %trunc1 = trunc i32 %res1 to i8 + %trunc2 = trunc i32 %res2 to i8 + %ptr3 = getelementptr i8 *%ptr1, i64 4096 + %ptr4 = getelementptr i8 *%ptr2, i64 524287 + store i8 %trunc1, i8 *%ptr1 + store i8 %trunc2, i8 *%ptr2 + store i8 %trunc1, i8 *%ptr3 + store i8 %trunc2, i8 *%ptr4 + ret void +} + +; Test truncating stores of high and low registers into 16-bit memory. +define void @f8(i16 *%ptr1, i16 *%ptr2) { +; CHECK-LABEL: f8: +; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]] +; CHECK-DAG: sthh [[REG1]], 0(%r2) +; CHECK-DAG: sth [[REG2]], 0(%r3) +; CHECK-DAG: sthh [[REG1]], 4096(%r2) +; CHECK-DAG: sthy [[REG2]], 524286(%r3) +; CHECK: br %r14 + %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"() + %res1 = extractvalue { i32, i32 } %res, 0 + %res2 = extractvalue { i32, i32 } %res, 1 + %trunc1 = trunc i32 %res1 to i16 + %trunc2 = trunc i32 %res2 to i16 + %ptr3 = getelementptr i16 *%ptr1, i64 2048 + %ptr4 = getelementptr i16 *%ptr2, i64 262143 + store i16 %trunc1, i16 *%ptr1 + store i16 %trunc2, i16 *%ptr2 + store i16 %trunc1, i16 *%ptr3 + store i16 %trunc2, i16 *%ptr4 + ret void +} + +; Test zero extensions from 8 bits between mixtures of high and low registers. +define i32 @f9(i8 %val1, i8 %val2) { +; CHECK-LABEL: f9: +; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 24, 159, 32 +; CHECK-DAG: llcr [[REG2:%r[0-5]]], %r3 +; CHECK: stepa [[REG1]], [[REG2]] +; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 24, 159, 0 +; CHECK: stepb [[REG3]] +; CHECK: risblg %r2, [[REG3]], 24, 159, 32 +; CHECK: br %r14 + %ext1 = zext i8 %val1 to i32 + %ext2 = zext i8 %val2 to i32 + %val3 = call i8 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2) + %ext3 = zext i8 %val3 to i32 + %val4 = call i8 asm sideeffect "stepb $0", "=h,0"(i32 %ext3) + %ext4 = zext i8 %val4 to i32 + ret i32 %ext4 +} + +; Test zero extensions from 16 bits between mixtures of high and low registers. +define i32 @f10(i16 %val1, i16 %val2) { +; CHECK-LABEL: f10: +; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 16, 159, 32 +; CHECK-DAG: llhr [[REG2:%r[0-5]]], %r3 +; CHECK: stepa [[REG1]], [[REG2]] +; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 16, 159, 0 +; CHECK: stepb [[REG3]] +; CHECK: risblg %r2, [[REG3]], 16, 159, 32 +; CHECK: br %r14 + %ext1 = zext i16 %val1 to i32 + %ext2 = zext i16 %val2 to i32 + %val3 = call i16 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2) + %ext3 = zext i16 %val3 to i32 + %val4 = call i16 asm sideeffect "stepb $0", "=h,0"(i32 %ext3) + %ext4 = zext i16 %val4 to i32 + ret i32 %ext4 +} + +; Test loads of 16-bit constants into mixtures of high and low registers. +define void @f11() { +; CHECK-LABEL: f11: +; CHECK-DAG: iihf [[REG1:%r[0-5]]], 4294934529 +; CHECK-DAG: lhi [[REG2:%r[0-5]]], -32768 +; CHECK-DAG: llihl [[REG3:%r[0-5]]], 32766 +; CHECK-DAG: lhi [[REG4:%r[0-5]]], 32767 +; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK: br %r14 + call void asm sideeffect "blah $0, $1, $2, $3", + "h,r,h,r"(i32 -32767, i32 -32768, + i32 32766, i32 32767) + ret void +} + +; Test loads of unsigned constants into mixtures of high and low registers. +; For stepc, we expect the h and r operands to be paired by the register +; allocator. It doesn't really matter which comes first: LLILL/IIHF would +; be just as good. +define void @f12() { +; CHECK-LABEL: f12: +; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32768 +; CHECK-DAG: llihl [[REG2:%r[0-5]]], 65535 +; CHECK-DAG: llihh [[REG3:%r[0-5]]], 1 +; CHECK-DAG: llihh [[REG4:%r[0-5]]], 65535 +; CHECK: stepa [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK-DAG: llill [[REG1:%r[0-5]]], 32769 +; CHECK-DAG: llill [[REG2:%r[0-5]]], 65534 +; CHECK-DAG: llilh [[REG3:%r[0-5]]], 2 +; CHECK-DAG: llilh [[REG4:%r[0-5]]], 65534 +; CHECK: stepb [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32770 +; CHECK-DAG: iilf [[REG1]], 65533 +; CHECK-DAG: llihh [[REG2:%r[0-5]]], 4 +; CHECK-DAG: iilf [[REG2]], 524288 +; CHECK: stepc [[REG1]], [[REG1]], [[REG2]], [[REG2]] +; CHECK-DAG: iihf [[REG1:%r[0-5]]], 3294967296 +; CHECK-DAG: iilf [[REG2:%r[0-5]]], 4294567296 +; CHECK-DAG: iihf [[REG3:%r[0-5]]], 1000000000 +; CHECK-DAG: iilf [[REG4:%r[0-5]]], 400000 +; CHECK: stepd [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK: br %r14 + call void asm sideeffect "stepa $0, $1, $2, $3", + "h,h,h,h"(i32 32768, i32 65535, + i32 65536, i32 -65536) + call void asm sideeffect "stepb $0, $1, $2, $3", + "r,r,r,r"(i32 32769, i32 65534, + i32 131072, i32 -131072) + call void asm sideeffect "stepc $0, $1, $2, $3", + "h,r,h,r"(i32 32770, i32 65533, + i32 262144, i32 524288) + call void asm sideeffect "stepd $0, $1, $2, $3", + "h,r,h,r"(i32 -1000000000, i32 -400000, + i32 1000000000, i32 400000) + ret void +} + +; Test selects involving high registers. +define void @f13(i32 %x, i32 %y) { +; CHECK-LABEL: f13: +; CHECK: llihl [[REG:%r[0-5]]], 0 +; CHECK: cije %r2, 0 +; CHECK: iihf [[REG]], 2102030405 +; CHECK: blah [[REG]] +; CHECK: br %r14 + %cmp = icmp eq i32 %x, 0 + %val = select i1 %cmp, i32 0, i32 2102030405 + call void asm sideeffect "blah $0", "h"(i32 %val) + ret void +} + +; Test selects involving low registers. +define void @f14(i32 %x, i32 %y) { +; CHECK-LABEL: f14: +; CHECK: lhi [[REG:%r[0-5]]], 0 +; CHECK: cije %r2, 0 +; CHECK: iilf [[REG]], 2102030405 +; CHECK: blah [[REG]] +; CHECK: br %r14 + %cmp = icmp eq i32 %x, 0 + %val = select i1 %cmp, i32 0, i32 2102030405 + call void asm sideeffect "blah $0", "r"(i32 %val) + ret void +} + +; Test immediate insertion involving high registers. +define void @f15() { +; CHECK-LABEL: f15: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: iihh [[REG]], 4660 +; CHECK: stepb [[REG]] +; CHECK: iihl [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %and1 = and i32 %res1, 65535 + %or1 = or i32 %and1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1) + %and2 = and i32 %res2, -65536 + %or2 = or i32 %and2, 34661 + call void asm sideeffect "stepc $0", "h"(i32 %or2) + ret void +} + +; Test immediate insertion involving low registers. +define void @f16() { +; CHECK-LABEL: f16: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: iilh [[REG]], 4660 +; CHECK: stepb [[REG]] +; CHECK: iill [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %and1 = and i32 %res1, 65535 + %or1 = or i32 %and1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1) + %and2 = and i32 %res2, -65536 + %or2 = or i32 %and2, 34661 + call void asm sideeffect "stepc $0", "r"(i32 %or2) + ret void +} + +; Test immediate OR involving high registers. +define void @f17() { +; CHECK-LABEL: f17: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: oihh [[REG]], 4660 +; CHECK: stepb [[REG]] +; CHECK: oihl [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: oihf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %or1 = or i32 %res1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1) + %or2 = or i32 %res2, 34661 + %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %or2) + %or3 = or i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "h"(i32 %or3) + ret void +} + +; Test immediate OR involving low registers. +define void @f18() { +; CHECK-LABEL: f18: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: oilh [[REG]], 4660 +; CHECK: stepb [[REG]] +; CHECK: oill [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: oilf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %or1 = or i32 %res1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1) + %or2 = or i32 %res2, 34661 + %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %or2) + %or3 = or i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "r"(i32 %or3) + ret void +} + +; Test immediate XOR involving high registers. +define void @f19() { +; CHECK-LABEL: f19: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: xihf [[REG]], 305397760 +; CHECK: stepb [[REG]] +; CHECK: xihf [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: xihf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %xor1 = xor i32 %res1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %xor1) + %xor2 = xor i32 %res2, 34661 + %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %xor2) + %xor3 = xor i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "h"(i32 %xor3) + ret void +} + +; Test immediate XOR involving low registers. +define void @f20() { +; CHECK-LABEL: f20: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: xilf [[REG]], 305397760 +; CHECK: stepb [[REG]] +; CHECK: xilf [[REG]], 34661 +; CHECK: stepc [[REG]] +; CHECK: xilf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %xor1 = xor i32 %res1, 305397760 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %xor1) + %xor2 = xor i32 %res2, 34661 + %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %xor2) + %xor3 = xor i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "r"(i32 %xor3) + ret void +} + +; Test two-operand immediate AND involving high registers. +define void @f21() { +; CHECK-LABEL: f21: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: nihh [[REG]], 4096 +; CHECK: stepb [[REG]] +; CHECK: nihl [[REG]], 57536 +; CHECK: stepc [[REG]] +; CHECK: nihf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %and1 = and i32 %res1, 268500991 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %and1) + %and2 = and i32 %res2, -8000 + %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %and2) + %and3 = and i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "h"(i32 %and3) + ret void +} + +; Test two-operand immediate AND involving low registers. +define void @f22() { +; CHECK-LABEL: f22: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: nilh [[REG]], 4096 +; CHECK: stepb [[REG]] +; CHECK: nill [[REG]], 57536 +; CHECK: stepc [[REG]] +; CHECK: nilf [[REG]], 12345678 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %and1 = and i32 %res1, 268500991 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %and1) + %and2 = and i32 %res2, -8000 + %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %and2) + %and3 = and i32 %res3, 12345678 + call void asm sideeffect "stepd $0", "r"(i32 %and3) + ret void +} + +; Test three-operand immediate AND involving mixtures of low and high registers. +define i32 @f23(i32 %old) { +; CHECK-LABEL: f23: +; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 0 +; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 32 +; CHECK: stepa %r2, [[REG1]], [[REG2]] +; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 0 +; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 32 +; CHECK: stepb [[REG2]], [[REG3]], %r2 +; CHECK: br %r14 + %and1 = and i32 %old, 14 + %and2 = and i32 %old, 254 + %res1 = call i32 asm "stepa $1, $2, $3", + "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2) + %and3 = and i32 %res1, 127 + %and4 = and i32 %res1, 128 + %res2 = call i32 asm "stepb $1, $2, $3", + "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4) + ret i32 %res2 +} + +; Test RISB[LH]G insertions involving mixtures of high and low registers. +define i32 @f24(i32 %old) { +; CHECK-LABEL: f24: +; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 1 +; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 29 +; CHECK: stepa %r2, [[REG1]], [[REG2]] +; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 62 +; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 37 +; CHECK: stepb [[REG2]], [[REG3]], %r2 +; CHECK: br %r14 + %shift1 = shl i32 %old, 1 + %and1 = and i32 %shift1, 14 + %shift2 = lshr i32 %old, 3 + %and2 = and i32 %shift2, 254 + %res1 = call i32 asm "stepa $1, $2, $3", + "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2) + %shift3 = lshr i32 %res1, 2 + %and3 = and i32 %shift3, 127 + %shift4 = shl i32 %res1, 5 + %and4 = and i32 %shift4, 128 + %res2 = call i32 asm "stepb $1, $2, $3", + "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4) + ret i32 %res2 +} + +; Test TMxx involving mixtures of high and low registers. +define i32 @f25(i32 %old) { +; CHECK-LABEL: f25: +; CHECK-DAG: tmll %r2, 1 +; CHECK-DAG: tmlh %r2, 1 +; CHECK: stepa [[REG1:%r[0-5]]], +; CHECK-DAG: tmhl [[REG1]], 1 +; CHECK-DAG: tmhh [[REG1]], 1 +; CHECK: stepb %r2, +; CHECK: br %r14 + %and1 = and i32 %old, 1 + %and2 = and i32 %old, 65536 + %cmp1 = icmp eq i32 %and1, 0 + %cmp2 = icmp eq i32 %and2, 0 + %sel1 = select i1 %cmp1, i32 100, i32 200 + %sel2 = select i1 %cmp2, i32 100, i32 200 + %res1 = call i32 asm "stepa $0, $1, $2", + "=h,r,r"(i32 %sel1, i32 %sel2) + %and3 = and i32 %res1, 1 + %and4 = and i32 %res1, 65536 + %cmp3 = icmp eq i32 %and3, 0 + %cmp4 = icmp eq i32 %and4, 0 + %sel3 = select i1 %cmp3, i32 100, i32 200 + %sel4 = select i1 %cmp4, i32 100, i32 200 + %res2 = call i32 asm "stepb $0, $1, $2", + "=r,h,h"(i32 %sel3, i32 %sel4) + ret i32 %res2 +} + +; Test two-operand halfword immediate addition involving high registers. +define void @f26() { +; CHECK-LABEL: f26: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -32768 +; CHECK: stepb [[REG]] +; CHECK: aih [[REG]], 1 +; CHECK: stepc [[REG]] +; CHECK: aih [[REG]], 32767 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %add1 = add i32 %res1, -32768 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1) + %add2 = add i32 %res2, 1 + %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2) + %add3 = add i32 %res3, 32767 + call void asm sideeffect "stepd $0", "h"(i32 %add3) + ret void +} + +; Test two-operand halfword immediate addition involving low registers. +define void @f27() { +; CHECK-LABEL: f27: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: ahi [[REG]], -32768 +; CHECK: stepb [[REG]] +; CHECK: ahi [[REG]], 1 +; CHECK: stepc [[REG]] +; CHECK: ahi [[REG]], 32767 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %add1 = add i32 %res1, -32768 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1) + %add2 = add i32 %res2, 1 + %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2) + %add3 = add i32 %res3, 32767 + call void asm sideeffect "stepd $0", "r"(i32 %add3) + ret void +} + +; Test three-operand halfword immediate addition involving mixtures of low +; and high registers. RISBHG/AIH would be OK too, instead of AHIK/RISBHG. +define i32 @f28(i32 %old) { +; CHECK-LABEL: f28: +; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14 +; CHECK: stepa %r2, [[REG1]] +; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254 +; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32 +; CHECK: stepb [[REG1]], [[REG2]] +; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0 +; CHECK: aih [[REG3]], 127 +; CHECK: stepc [[REG2]], [[REG3]] +; CHECK: risblg %r2, [[REG3]], 0, 159, 32 +; CHECK: ahi %r2, 128 +; CHECK: stepd [[REG3]], %r2 +; CHECK: br %r14 + %add1 = add i32 %old, 14 + %res1 = call i32 asm "stepa $1, $2", + "=r,r,0"(i32 %old, i32 %add1) + %add2 = add i32 %res1, 254 + %res2 = call i32 asm "stepb $1, $2", + "=h,r,0"(i32 %res1, i32 %add2) + %add3 = add i32 %res2, 127 + %res3 = call i32 asm "stepc $1, $2", + "=h,h,0"(i32 %res2, i32 %add3) + %add4 = add i32 %res3, 128 + %res4 = call i32 asm "stepd $1, $2", + "=r,h,0"(i32 %res3, i32 %add4) + ret i32 %res4 +} + +; Test large immediate addition involving high registers. +define void @f29() { +; CHECK-LABEL: f29: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -32769 +; CHECK: stepb [[REG]] +; CHECK: aih [[REG]], 32768 +; CHECK: stepc [[REG]] +; CHECK: aih [[REG]], 1000000000 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %add1 = add i32 %res1, -32769 + %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1) + %add2 = add i32 %res2, 32768 + %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2) + %add3 = add i32 %res3, 1000000000 + call void asm sideeffect "stepd $0", "h"(i32 %add3) + ret void +} + +; Test large immediate addition involving low registers. +define void @f30() { +; CHECK-LABEL: f30: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -32769 +; CHECK: stepb [[REG]] +; CHECK: afi [[REG]], 32768 +; CHECK: stepc [[REG]] +; CHECK: afi [[REG]], 1000000000 +; CHECK: stepd [[REG]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %add1 = add i32 %res1, -32769 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1) + %add2 = add i32 %res2, 32768 + %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2) + %add3 = add i32 %res3, 1000000000 + call void asm sideeffect "stepd $0", "r"(i32 %add3) + ret void +} + +; Test large immediate comparison involving high registers. +define i32 @f31() { +; CHECK-LABEL: f31: +; CHECK: stepa [[REG1:%r[0-5]]] +; CHECK: cih [[REG1]], 1000000000 +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: clih [[REG2]], 1000000000 +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %cmp1 = icmp sle i32 %res1, 1000000000 + %sel1 = select i1 %cmp1, i32 0, i32 1 + %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1) + %cmp2 = icmp ule i32 %res2, 1000000000 + %sel2 = select i1 %cmp2, i32 0, i32 1 + ret i32 %sel2 +} + +; Test large immediate comparison involving low registers. +define i32 @f32() { +; CHECK-LABEL: f32: +; CHECK: stepa [[REG1:%r[0-5]]] +; CHECK: cfi [[REG1]], 1000000000 +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: clfi [[REG2]], 1000000000 +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %cmp1 = icmp sle i32 %res1, 1000000000 + %sel1 = select i1 %cmp1, i32 0, i32 1 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1) + %cmp2 = icmp ule i32 %res2, 1000000000 + %sel2 = select i1 %cmp2, i32 0, i32 1 + ret i32 %sel2 +} + +; Test memory comparison involving high registers. +define void @f33(i32 *%ptr1, i32 *%ptr2) { +; CHECK-LABEL: f33: +; CHECK: stepa [[REG1:%r[0-5]]] +; CHECK: chf [[REG1]], 0(%r2) +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: clhf [[REG2]], 0(%r3) +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %load1 = load i32 *%ptr1 + %cmp1 = icmp sle i32 %res1, %load1 + %sel1 = select i1 %cmp1, i32 0, i32 1 + %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1) + %load2 = load i32 *%ptr2 + %cmp2 = icmp ule i32 %res2, %load2 + %sel2 = select i1 %cmp2, i32 0, i32 1 + store i32 %sel2, i32 *%ptr1 + ret void +} + +; Test memory comparison involving low registers. +define void @f34(i32 *%ptr1, i32 *%ptr2) { +; CHECK-LABEL: f34: +; CHECK: stepa [[REG1:%r[0-5]]] +; CHECK: c [[REG1]], 0(%r2) +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: cl [[REG2]], 0(%r3) +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=r"() + %load1 = load i32 *%ptr1 + %cmp1 = icmp sle i32 %res1, %load1 + %sel1 = select i1 %cmp1, i32 0, i32 1 + %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1) + %load2 = load i32 *%ptr2 + %cmp2 = icmp ule i32 %res2, %load2 + %sel2 = select i1 %cmp2, i32 0, i32 1 + store i32 %sel2, i32 *%ptr1 + ret void +} diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll index 3e86bcf78ae68..a5bc8833e78ae 100644 --- a/test/CodeGen/SystemZ/atomic-load-01.ll +++ b/test/CodeGen/SystemZ/atomic-load-01.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that loads are handled. ; The CS-based sequence is probably far too conservative. define i8 @f1(i8 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cs ; CHECK: br %r14 %val = load atomic i8 *%src seq_cst, align 1 diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll index d6168cedb8a86..2c9bbdb488a13 100644 --- a/test/CodeGen/SystemZ/atomic-load-02.ll +++ b/test/CodeGen/SystemZ/atomic-load-02.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that loads are handled. ; The CS-based sequence is probably far too conservative. define i16 @f1(i16 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cs ; CHECK: br %r14 %val = load atomic i16 *%src seq_cst, align 2 diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll index fcf0cf3d5a903..1fb41f5e39aab 100644 --- a/test/CodeGen/SystemZ/atomic-load-03.ll +++ b/test/CodeGen/SystemZ/atomic-load-03.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that loads are handled. ; Using CS is probably too conservative. define i32 @f1(i32 %dummy, i32 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r2, 0 ; CHECK: cs %r2, %r2, 0(%r3) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll index 9593d35fef0ce..92cac406e2003 100644 --- a/test/CodeGen/SystemZ/atomic-load-04.ll +++ b/test/CodeGen/SystemZ/atomic-load-04.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that loads are handled. ; Using CSG is probably too conservative. define i64 @f1(i64 %dummy, i64 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lghi %r2, 0 ; CHECK: csg %r2, %r2, 0(%r3) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/atomic-store-01.ll b/test/CodeGen/SystemZ/atomic-store-01.ll index b316e5cd6309d..53ed24f623cf8 100644 --- a/test/CodeGen/SystemZ/atomic-store-01.ll +++ b/test/CodeGen/SystemZ/atomic-store-01.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that stores are handled. ; The CS-based sequence is probably far too conservative. define void @f1(i8 %val, i8 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cs ; CHECK: br %r14 store atomic i8 %val, i8 *%src seq_cst, align 1 diff --git a/test/CodeGen/SystemZ/atomic-store-02.ll b/test/CodeGen/SystemZ/atomic-store-02.ll index c761714318830..42d6695b51d91 100644 --- a/test/CodeGen/SystemZ/atomic-store-02.ll +++ b/test/CodeGen/SystemZ/atomic-store-02.ll @@ -5,7 +5,7 @@ ; This is just a placeholder to make sure that stores are handled. ; The CS-based sequence is probably far too conservative. define void @f1(i16 %val, i16 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cs ; CHECK: br %r14 store atomic i16 %val, i16 *%src seq_cst, align 2 diff --git a/test/CodeGen/SystemZ/atomic-store-03.ll b/test/CodeGen/SystemZ/atomic-store-03.ll index 6e2996313db68..846c86fd3662c 100644 --- a/test/CodeGen/SystemZ/atomic-store-03.ll +++ b/test/CodeGen/SystemZ/atomic-store-03.ll @@ -5,11 +5,11 @@ ; This is just a placeholder to make sure that stores are handled. ; Using CS is probably too conservative. define void @f1(i32 %val, i32 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r0, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: cs %r0, %r2, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 store atomic i32 %val, i32 *%src seq_cst, align 4 ret void diff --git a/test/CodeGen/SystemZ/atomic-store-04.ll b/test/CodeGen/SystemZ/atomic-store-04.ll index 7a611c8cf0815..24615b115658f 100644 --- a/test/CodeGen/SystemZ/atomic-store-04.ll +++ b/test/CodeGen/SystemZ/atomic-store-04.ll @@ -5,11 +5,11 @@ ; This is just a placeholder to make sure that stores are handled. ; Using CS is probably too conservative. define void @f1(i64 %val, i64 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r0, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: csg %r0, %r2, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 store atomic i64 %val, i64 *%src seq_cst, align 8 ret void diff --git a/test/CodeGen/SystemZ/atomicrmw-add-01.ll b/test/CodeGen/SystemZ/atomicrmw-add-01.ll index 2a84857f836df..25f71f31ef1b1 100644 --- a/test/CodeGen/SystemZ/atomicrmw-add-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-add-01.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: ar [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We add 0x80000000 to the rotated word. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) { ; CHECK: afi [[ROT]], -2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i8 *%src, i8 -128 seq_cst ret i8 %res @@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) { ; Check addition of -1. We add 0xff000000 to the rotated word. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: afi [[ROT]], -16777216 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i8 *%src, i8 -1 seq_cst ret i8 %res @@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) { ; Check addition of 1. We add 0x01000000 to the rotated word. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi [[ROT]], 16777216 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i8 *%src, i8 1 seq_cst ret i8 %res @@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We add 0x7f000000 to the rotated word. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi [[ROT]], 2130706432 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i8 *%src, i8 127 seq_cst ret i8 %res @@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) { ; Check addition of a large unsigned value. We add 0xfe000000 to the ; rotated word, expressed as a negative AFI operand. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi [[ROT]], -33554432 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i8 *%src, i8 254 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-add-02.ll b/test/CodeGen/SystemZ/atomicrmw-add-02.ll index 3dd482dd323db..cd4e4784c372e 100644 --- a/test/CodeGen/SystemZ/atomicrmw-add-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-add-02.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: ar [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We add 0x80000000 to the rotated word. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) { ; CHECK: afi [[ROT]], -2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) { ; Check addition of -1. We add 0xffff0000 to the rotated word. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: afi [[ROT]], -65536 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i16 *%src, i16 -1 seq_cst ret i16 %res @@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) { ; Check addition of 1. We add 0x00010000 to the rotated word. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi [[ROT]], 65536 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i16 *%src, i16 1 seq_cst ret i16 %res @@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We add 0x7fff0000 to the rotated word. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi [[ROT]], 2147418112 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i16 *%src, i16 32767 seq_cst ret i16 %res @@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) { ; Check addition of a large unsigned value. We add 0xfffe0000 to the ; rotated word, expressed as a negative AFI operand. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi [[ROT]], -131072 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw add i16 *%src, i16 65534 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-add-03.ll b/test/CodeGen/SystemZ/atomicrmw-add-03.ll index 01eb8e0d7464c..a81af72d1ed95 100644 --- a/test/CodeGen/SystemZ/atomicrmw-add-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-add-03.ll @@ -1,16 +1,16 @@ ; Test 32-bit atomic additions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check addition of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lr %r0, %r2 ; CHECK: ar %r0, %r4 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 %b seq_cst ret i32 %res @@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check addition of 1, which can use AHI. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lr %r0, %r2 ; CHECK: ahi %r0, 1 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 1 seq_cst ret i32 %res @@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check the high end of the AHI range. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ahi %r0, 32767 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 32767 seq_cst @@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check the next value up, which must use AFI. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi %r0, 32768 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 32768 seq_cst @@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) { ; Check the high end of the AFI range. define i32 @f5(i32 %dummy, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi %r0, 2147483647 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 2147483647 seq_cst @@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) { ; Check the next value up, which gets treated as a negative operand. define i32 @f6(i32 %dummy, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi %r0, -2147483648 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 2147483648 seq_cst @@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) { ; Check addition of -1, which can use AHI. define i32 @f7(i32 %dummy, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ahi %r0, -1 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 -1 seq_cst @@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) { ; Check the low end of the AHI range. define i32 @f8(i32 %dummy, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ahi %r0, -32768 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 -32768 seq_cst @@ -86,7 +86,7 @@ define i32 @f8(i32 %dummy, i32 *%src) { ; Check the next value down, which must use AFI instead. define i32 @f9(i32 %dummy, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: afi %r0, -32769 ; CHECK: br %r14 %res = atomicrmw add i32 *%src, i32 -32769 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-add-04.ll b/test/CodeGen/SystemZ/atomicrmw-add-04.ll index 6b1d20bd080e9..e7905491f2e0e 100644 --- a/test/CodeGen/SystemZ/atomicrmw-add-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-add-04.ll @@ -1,16 +1,16 @@ ; Test 64-bit atomic additions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check addition of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: agr %r0, %r4 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 %b seq_cst ret i64 %res @@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check addition of 1, which can use AGHI. define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: aghi %r0, 1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 1 seq_cst ret i64 %res @@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) { ; Check the high end of the AGHI range. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r0, 32767 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 32767 seq_cst @@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) { ; Check the next value up, which must use AGFI. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r0, 32768 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 32768 seq_cst @@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) { ; Check the high end of the AGFI range. define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r0, 2147483647 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 2147483647 seq_cst @@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register addition. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agr ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 2147483648 seq_cst @@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) { ; Check addition of -1, which can use AGHI. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: aghi %r0, -1 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 -1 seq_cst @@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) { ; Check the low end of the AGHI range. define i64 @f8(i64 %dummy, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r0, -32768 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 -32768 seq_cst @@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) { ; Check the next value down, which must use AGFI instead. define i64 @f9(i64 %dummy, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r0, -32769 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 -32769 seq_cst @@ -95,7 +95,7 @@ define i64 @f9(i64 %dummy, i64 *%src) { ; Check the low end of the AGFI range. define i64 @f10(i64 %dummy, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r0, -2147483648 ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 -2147483648 seq_cst @@ -104,7 +104,7 @@ define i64 @f10(i64 %dummy, i64 *%src) { ; Check the next value down, which must use a register addition. define i64 @f11(i64 %dummy, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agr ; CHECK: br %r14 %res = atomicrmw add i64 *%src, i64 -2147483649 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-and-01.ll b/test/CodeGen/SystemZ/atomicrmw-and-01.ll index ebbce8e7872b9..6d2f541c3a353 100644 --- a/test/CodeGen/SystemZ/atomicrmw-and-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-and-01.ll @@ -13,7 +13,7 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: nr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: oilf %r3, 16777215 ; CHECK-SHIFT2: rll @@ -48,7 +48,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We AND the rotated word with 0x80ffffff. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -57,11 +57,11 @@ define i8 @f2(i8 *%src) { ; CHECK: nilh [[ROT]], 33023 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -69,7 +69,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i8 *%src, i8 -128 seq_cst ret i8 %res @@ -77,13 +77,13 @@ define i8 @f2(i8 *%src) { ; Check ANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfeffffff. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh [[ROT]], 65279 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i8 *%src, i8 -2 seq_cst ret i8 %res @@ -91,13 +91,13 @@ define i8 @f3(i8 *%src) { ; Check ANDs of 1. We AND the rotated word with 0x01ffffff. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilh [[ROT]], 511 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i8 *%src, i8 1 seq_cst ret i8 %res @@ -105,13 +105,13 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We AND the rotated word with 0x7fffffff. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nilh [[ROT]], 32767 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i8 *%src, i8 127 seq_cst ret i8 %res @@ -120,13 +120,13 @@ define i8 @f5(i8 *%src) { ; Check ANDs of a large unsigned value. We AND the rotated word with ; 0xfdffffff. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nilh [[ROT]], 65023 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i8 *%src, i8 253 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-and-02.ll b/test/CodeGen/SystemZ/atomicrmw-and-02.ll index b63ca4ab44073..572b22484b28b 100644 --- a/test/CodeGen/SystemZ/atomicrmw-and-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-and-02.ll @@ -13,7 +13,7 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: nr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: oill %r3, 65535 ; CHECK-SHIFT2: rll @@ -48,7 +48,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We AND the rotated word with 0x8000ffff. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -57,11 +57,11 @@ define i16 @f2(i16 *%src) { ; CHECK: nilh [[ROT]], 32768 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -69,7 +69,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -77,13 +77,13 @@ define i16 @f2(i16 *%src) { ; Check ANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfffeffff. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh [[ROT]], 65534 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i16 *%src, i16 -2 seq_cst ret i16 %res @@ -91,13 +91,13 @@ define i16 @f3(i16 *%src) { ; Check ANDs of 1. We AND the rotated word with 0x0001ffff. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilh [[ROT]], 1 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i16 *%src, i16 1 seq_cst ret i16 %res @@ -105,13 +105,13 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We AND the rotated word with 0x7fffffff. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nilh [[ROT]], 32767 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i16 *%src, i16 32767 seq_cst ret i16 %res @@ -120,13 +120,13 @@ define i16 @f5(i16 *%src) { ; Check ANDs of a large unsigned value. We AND the rotated word with ; 0xfffdffff. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nilh [[ROT]], 65533 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw and i16 *%src, i16 65533 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-and-03.ll b/test/CodeGen/SystemZ/atomicrmw-and-03.ll index ec69edcf1a479..8d813a1402493 100644 --- a/test/CodeGen/SystemZ/atomicrmw-and-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-and-03.ll @@ -1,16 +1,16 @@ ; Test 32-bit atomic ANDs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check ANDs of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: nr %r0, %r4 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 %b seq_cst ret i32 %res @@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check ANDs of 1. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: nilf %r0, 1 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 1 seq_cst ret i32 %res @@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check ANDs of the low end of the NILH range. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh %r0, 0 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 65535 seq_cst @@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check the next value up, which must use NILF. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilf %r0, 65536 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 65536 seq_cst @@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) { ; Check the largest useful NILL value. define i32 @f5(i32 %dummy, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nill %r0, 65534 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 -2 seq_cst @@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) { ; Check the low end of the NILL range. define i32 @f6(i32 %dummy, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nill %r0, 0 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 -65536 seq_cst @@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) { ; Check the largest useful NILH value, which is one less than the above. define i32 @f7(i32 %dummy, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: nilh %r0, 65534 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 -65537 seq_cst @@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) { ; Check the highest useful NILF value, which is one less than the above. define i32 @f8(i32 %dummy, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: nilf %r0, 4294901758 ; CHECK: br %r14 %res = atomicrmw and i32 *%src, i32 -65538 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-and-04.ll b/test/CodeGen/SystemZ/atomicrmw-and-04.ll index 71f29baa0e6fe..89899a6a03af4 100644 --- a/test/CodeGen/SystemZ/atomicrmw-and-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-and-04.ll @@ -1,156 +1,170 @@ ; Test 64-bit atomic ANDs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check ANDs of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: ngr %r0, %r4 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw and i64 *%src, i64 %b seq_cst ret i64 %res } -; Check ANDs of 1, which must be done using a register. +; Check ANDs of 1, which are done using a register. (We could use RISBG +; instead, but that isn't implemented yet.) define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ngr ; CHECK: br %r14 %res = atomicrmw and i64 *%src, i64 1 seq_cst ret i64 %res } -; Check the low end of the NIHF range. +; Check the equivalent of NIHF with 1, which can use RISBG instead. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: lgr %r0, %r2 -; CHECK: nihf %r0, 0 +; CHECK: risbg %r0, %r2, 31, 191, 0 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 4294967295 seq_cst + %res = atomicrmw and i64 *%src, i64 8589934591 seq_cst ret i64 %res } -; Check the next value up, which must use a register. +; Check the lowest NIHF value outside the range of RISBG. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: -; CHECK: ngr +; CHECK-LABEL: f4: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: nihf %r0, 2 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: jl [[LABEL]] ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 4294967296 seq_cst + %res = atomicrmw and i64 *%src, i64 12884901887 seq_cst ret i64 %res } -; Check the low end of the NIHH range. +; Check the next value up, which must use a register. define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: -; CHECK: nihh %r0, 0 +; CHECK-LABEL: f5: +; CHECK: ngr ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 281474976710655 seq_cst + %res = atomicrmw and i64 *%src, i64 12884901888 seq_cst ret i64 %res } -; Check the next value up, which must use a register. +; Check the lowest NIHH value outside the range of RISBG. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: -; CHECK: ngr +; CHECK-LABEL: f6: +; CHECK: nihh {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst + %res = atomicrmw and i64 *%src, i64 844424930131967 seq_cst ret i64 %res } -; Check the highest useful NILL value. +; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: -; CHECK: nill %r0, 65534 +; CHECK-LABEL: f7: +; CHECK: ngr ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -2 seq_cst + %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst ret i64 %res } -; Check the low end of the NILL range. +; Check the highest NILL value outside the range of RISBG. define i64 @f8(i64 %dummy, i64 *%src) { -; CHECK: f8: -; CHECK: nill %r0, 0 +; CHECK-LABEL: f8: +; CHECK: nill {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -65536 seq_cst + %res = atomicrmw and i64 *%src, i64 -6 seq_cst ret i64 %res } -; Check the highest useful NILH value, which is one less than the above. +; Check the lowest NILL value outside the range of RISBG. define i64 @f9(i64 %dummy, i64 *%src) { -; CHECK: f9: -; CHECK: nilh %r0, 65534 +; CHECK-LABEL: f9: +; CHECK: nill {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -65537 seq_cst + %res = atomicrmw and i64 *%src, i64 -65534 seq_cst ret i64 %res } -; Check the highest useful NILF value, which is one less than the above. +; Check the highest useful NILF value. define i64 @f10(i64 %dummy, i64 *%src) { -; CHECK: f10: -; CHECK: nilf %r0, 4294901758 +; CHECK-LABEL: f10: +; CHECK: nilf {{%r[0-5]}}, 4294901758 ; CHECK: br %r14 %res = atomicrmw and i64 *%src, i64 -65538 seq_cst ret i64 %res } -; Check the low end of the NILH range. +; Check the highest NILH value outside the range of RISBG. define i64 @f11(i64 %dummy, i64 *%src) { -; CHECK: f11: -; CHECK: nilh %r0, 0 +; CHECK-LABEL: f11: +; CHECK: nilh {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -4294901761 seq_cst + %res = atomicrmw and i64 *%src, i64 -327681 seq_cst ret i64 %res } -; Check the low end of the NILF range. +; Check the lowest NILH value outside the range of RISBG. define i64 @f12(i64 %dummy, i64 *%src) { -; CHECK: f12: -; CHECK: nilf %r0, 0 +; CHECK-LABEL: f12: +; CHECK: nilh {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -4294967296 seq_cst + %res = atomicrmw and i64 *%src, i64 -4294770689 seq_cst ret i64 %res } -; Check the highest useful NIHL value, which is one less than the above. +; Check the lowest NILF value outside the range of RISBG. define i64 @f13(i64 %dummy, i64 *%src) { -; CHECK: f13: -; CHECK: nihl %r0, 65534 +; CHECK-LABEL: f13: +; CHECK: nilf {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -4294967297 seq_cst + %res = atomicrmw and i64 *%src, i64 -4294967294 seq_cst ret i64 %res } -; Check the low end of the NIHL range. +; Check the highest NIHL value outside the range of RISBG. define i64 @f14(i64 %dummy, i64 *%src) { -; CHECK: f14: -; CHECK: nihl %r0, 0 +; CHECK-LABEL: f14: +; CHECK: nihl {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -281470681743361 seq_cst + %res = atomicrmw and i64 *%src, i64 -21474836481 seq_cst ret i64 %res } -; Check the highest useful NIHH value, which is 1<<32 less than the above. +; Check the lowest NIHL value outside the range of RISBG. define i64 @f15(i64 %dummy, i64 *%src) { -; CHECK: f15: -; CHECK: nihh %r0, 65534 +; CHECK-LABEL: f15: +; CHECK: nihl {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw and i64 *%src, i64 -281474976710657 seq_cst + %res = atomicrmw and i64 *%src, i64 -281462091808769 seq_cst ret i64 %res } -; Check the highest useful NIHF value, which is 1<<32 less than the above. +; Check the highest NIHH value outside the range of RISBG. define i64 @f16(i64 %dummy, i64 *%src) { -; CHECK: f16: -; CHECK: nihf %r0, 4294901758 +; CHECK-LABEL: f16: +; CHECK: nihh {{%r[0-5]}}, 65530 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -1407374883553281 seq_cst + ret i64 %res +} + +; Check the highest useful NIHF value. +define i64 @f17(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f17: +; CHECK: nihf {{%r[0-5]}}, 4294901758 ; CHECK: br %r14 %res = atomicrmw and i64 *%src, i64 -281479271677953 seq_cst ret i64 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll index c6ec77e91b3d3..2b750c46e2611 100644 --- a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll @@ -13,23 +13,22 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: cr [[ROT]], %r3 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: crjle [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 39, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -37,10 +36,10 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: crjle {{%r[0-9]+}}, %r3 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -50,23 +49,22 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check signed maximum. define i8 @f2(i8 *%src, i8 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: cr [[ROT]], %r3 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: crjhe [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 39, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -74,10 +72,10 @@ define i8 @f2(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: crjhe {{%r[0-9]+}}, %r3 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -87,23 +85,22 @@ define i8 @f2(i8 *%src, i8 %b) { ; Check unsigned minimum. define i8 @f3(i8 *%src, i8 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: clr [[ROT]], %r3 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 39, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -111,10 +108,10 @@ define i8 @f3(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3, ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -124,23 +121,22 @@ define i8 @f3(i8 *%src, i8 %b) { ; Check unsigned maximum. define i8 @f4(i8 *%src, i8 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: clr [[ROT]], %r3 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 39, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -148,10 +144,10 @@ define i8 @f4(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3, ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -162,15 +158,15 @@ define i8 @f4(i8 *%src, i8 %b) { ; Check the lowest useful signed minimum value. We need to load 0x81000000 ; into the source register. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llilh [[SRC2:%r[0-9]+]], 33024 -; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: crjle [[ROT:%r[0-9]+]], [[SRC2]] ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw min i8 *%src, i8 -127 seq_cst ret i8 %res @@ -179,15 +175,15 @@ define i8 @f5(i8 *%src) { ; Check the highest useful signed maximum value. We need to load 0x7e000000 ; into the source register. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: llilh [[SRC2:%r[0-9]+]], 32256 -; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: crjhe [[ROT:%r[0-9]+]], [[SRC2]] ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw max i8 *%src, i8 126 seq_cst ret i8 %res @@ -196,15 +192,15 @@ define i8 @f6(i8 *%src) { ; Check the lowest useful unsigned minimum value. We need to load 0x01000000 ; into the source register. define i8 @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llilh [[SRC2:%r[0-9]+]], 256 -; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]], ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f7: +; CHECK-SHIFT1-LABEL: f7: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f7: +; CHECK-SHIFT2-LABEL: f7: ; CHECK-SHIFT2: br %r14 %res = atomicrmw umin i8 *%src, i8 1 seq_cst ret i8 %res @@ -213,15 +209,15 @@ define i8 @f7(i8 *%src) { ; Check the highest useful unsigned maximum value. We need to load 0xfe000000 ; into the source register. define i8 @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llilh [[SRC2:%r[0-9]+]], 65024 -; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]], ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f8: +; CHECK-SHIFT1-LABEL: f8: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f8: +; CHECK-SHIFT2-LABEL: f8: ; CHECK-SHIFT2: br %r14 %res = atomicrmw umax i8 *%src, i8 254 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll index 9612e99b73874..98ffedf28c691 100644 --- a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll @@ -13,23 +13,22 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: cr [[ROT]], %r3 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: crjle [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 47, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -37,10 +36,10 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: crjle {{%r[0-9]+}}, %r3 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -50,23 +49,22 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check signed maximum. define i16 @f2(i16 *%src, i16 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: cr [[ROT]], %r3 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: crjhe [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 47, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -74,10 +72,10 @@ define i16 @f2(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: crjhe {{%r[0-9]+}}, %r3 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -87,23 +85,22 @@ define i16 @f2(i16 *%src, i16 %b) { ; Check unsigned minimum. define i16 @f3(i16 *%src, i16 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: clr [[ROT]], %r3 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 47, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -111,10 +108,10 @@ define i16 @f3(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3, ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -124,23 +121,22 @@ define i16 @f3(i16 *%src, i16 %b) { ; Check unsigned maximum. define i16 @f4(i16 *%src, i16 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) -; CHECK: clr [[ROT]], %r3 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]] ; CHECK: risbg [[ROT]], %r3, 32, 47, 0 ; CHECK: [[KEEP]]: ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -148,10 +144,10 @@ define i16 @f4(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll -; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3, ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: br %r14 @@ -162,15 +158,15 @@ define i16 @f4(i16 *%src, i16 %b) { ; Check the lowest useful signed minimum value. We need to load 0x80010000 ; into the source register. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llilh [[SRC2:%r[0-9]+]], 32769 -; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: crjle [[ROT:%r[0-9]+]], [[SRC2]] ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw min i16 *%src, i16 -32767 seq_cst ret i16 %res @@ -179,15 +175,15 @@ define i16 @f5(i16 *%src) { ; Check the highest useful signed maximum value. We need to load 0x7ffe0000 ; into the source register. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: llilh [[SRC2:%r[0-9]+]], 32766 -; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: crjhe [[ROT:%r[0-9]+]], [[SRC2]] ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw max i16 *%src, i16 32766 seq_cst ret i16 %res @@ -196,15 +192,15 @@ define i16 @f6(i16 *%src) { ; Check the lowest useful unsigned maximum value. We need to load 0x00010000 ; into the source register. define i16 @f7(i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llilh [[SRC2:%r[0-9]+]], 1 -; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]], ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f7: +; CHECK-SHIFT1-LABEL: f7: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f7: +; CHECK-SHIFT2-LABEL: f7: ; CHECK-SHIFT2: br %r14 %res = atomicrmw umin i16 *%src, i16 1 seq_cst ret i16 %res @@ -213,15 +209,15 @@ define i16 @f7(i16 *%src) { ; Check the highest useful unsigned maximum value. We need to load 0xfffe0000 ; into the source register. define i16 @f8(i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llilh [[SRC2:%r[0-9]+]], 65534 -; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]], ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f8: +; CHECK-SHIFT1-LABEL: f8: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f8: +; CHECK-SHIFT2-LABEL: f8: ; CHECK-SHIFT2: br %r14 %res = atomicrmw umax i16 *%src, i16 65534 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll index b5809bdc1693f..f2152c6f28bc9 100644 --- a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -1,18 +1,18 @@ -; Test 32-bit atomic minimum and maximum. +; Test 32-bit atomic minimum and maximum. Here we match the z10 versions, +; which can't use LOCR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check signed minium. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cr %r2, %r4 ; CHECK: lr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: crjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 ; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw min i32 *%src, i32 %b seq_cst ret i32 %res @@ -20,15 +20,14 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check signed maximum. define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cr %r2, %r4 ; CHECK: lr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: crjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 ; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw max i32 *%src, i32 %b seq_cst ret i32 %res @@ -36,15 +35,14 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { ; Check unsigned minimum. define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: clr %r2, %r4 ; CHECK: lr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: clrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 ; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw umin i32 *%src, i32 %b seq_cst ret i32 %res @@ -52,15 +50,14 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { ; Check unsigned maximum. define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: clr %r2, %r4 ; CHECK: lr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 ; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw umax i32 *%src, i32 %b seq_cst ret i32 %res @@ -68,7 +65,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the aligned CS range. define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: l %r2, 4092(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3) ; CHECK: br %r14 @@ -79,7 +76,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word up, which requires CSY. define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ly %r2, 4096(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3) ; CHECK: br %r14 @@ -90,7 +87,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the aligned CSY range. define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ly %r2, 524284(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3) ; CHECK: br %r14 @@ -101,7 +98,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word up, which needs separate address logic. define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, 524288 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -113,7 +110,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the negative aligned CSY range. define i32 @f9(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ly %r2, -4(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3) ; CHECK: br %r14 @@ -124,7 +121,7 @@ define i32 @f9(i32 %dummy, i32 *%src, i32 %b) { ; Check the low end of the CSY range. define i32 @f10(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ly %r2, -524288(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3) ; CHECK: br %r14 @@ -135,7 +132,7 @@ define i32 @f10(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word down, which needs separate address logic. define i32 @f11(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r3, -524292 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -147,7 +144,7 @@ define i32 @f11(i32 %dummy, i32 *%src, i32 %b) { ; Check that indexed addresses are not allowed. define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agr %r3, %r4 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -158,18 +155,17 @@ define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) { ret i32 %res } -; Check that constants are forced into a register. +; Check that constants are handled. define i32 @f13(i32 %dummy, i32 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: lhi [[LIMIT:%r[0-9]+]], 42 ; CHECK: l %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cr %r2, [[LIMIT]] ; CHECK: lr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] -; CHECK: lr [[NEW]], [[LIMIT]] +; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]] +; CHECK: lhi [[NEW]], 42 ; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw min i32 *%ptr, i32 42 seq_cst ret i32 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll index 68978547d3e9f..037eb1aa9367c 100644 --- a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -1,18 +1,18 @@ -; Test 64-bit atomic minimum and maximum. +; Test 64-bit atomic minimum and maximum. Here we match the z10 versions, +; which can't use LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check signed minium. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cgr %r2, %r4 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: cgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 ; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw min i64 *%src, i64 %b seq_cst ret i64 %res @@ -20,15 +20,14 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check signed maximum. define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cgr %r2, %r4 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: cgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 ; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw max i64 *%src, i64 %b seq_cst ret i64 %res @@ -36,15 +35,14 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { ; Check unsigned minimum. define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: clgr %r2, %r4 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 ; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw umin i64 *%src, i64 %b seq_cst ret i64 %res @@ -52,15 +50,14 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { ; Check unsigned maximum. define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: clgr %r2, %r4 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 ; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw umax i64 *%src, i64 %b seq_cst ret i64 %res @@ -68,7 +65,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { ; Check the high end of the aligned CSG range. define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lg %r2, 524280(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3) ; CHECK: br %r14 @@ -79,7 +76,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { ; Check the next doubleword up, which requires separate address logic. define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -91,7 +88,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { ; Check the low end of the CSG range. define i64 @f7(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lg %r2, -524288(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3) ; CHECK: br %r14 @@ -102,7 +99,7 @@ define i64 @f7(i64 %dummy, i64 *%src, i64 %b) { ; Check the next doubleword down, which requires separate address logic. define i64 @f8(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, -524296 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -114,7 +111,7 @@ define i64 @f8(i64 %dummy, i64 *%src, i64 %b) { ; Check that indexed addresses are not allowed. define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agr %r3, %r4 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -125,18 +122,17 @@ define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) { ret i64 %res } -; Check that constants are forced into a register. +; Check that constants are handled. define i64 @f10(i64 %dummy, i64 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: lghi [[LIMIT:%r[0-9]+]], 42 ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: cgr %r2, [[LIMIT]] ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: j{{g?}}le [[KEEP:\..*]] -; CHECK: lgr [[NEW]], [[LIMIT]] +; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]] +; CHECK: lghi [[NEW]], 42 ; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: jl [[LOOP]] ; CHECK: br %r14 %res = atomicrmw min i64 *%ptr, i64 42 seq_cst ret i64 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll index 1ede3b465be2c..db5bb8ff9e790 100644 --- a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll @@ -13,7 +13,7 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -23,11 +23,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -35,7 +35,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: oilf %r3, 16777215 ; CHECK-SHIFT2: rll @@ -49,7 +49,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We AND the rotated word with 0x80ffffff. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -59,11 +59,11 @@ define i8 @f2(i8 *%src) { ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -71,7 +71,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i8 *%src, i8 -128 seq_cst ret i8 %res @@ -79,14 +79,14 @@ define i8 @f2(i8 *%src) { ; Check NANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfeffffff. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh [[ROT]], 65279 ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i8 *%src, i8 -2 seq_cst ret i8 %res @@ -94,14 +94,14 @@ define i8 @f3(i8 *%src) { ; Check NANDs of 1. We AND the rotated word with 0x01ffffff. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilh [[ROT]], 511 ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i8 *%src, i8 1 seq_cst ret i8 %res @@ -109,14 +109,14 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We AND the rotated word with 0x7fffffff. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nilh [[ROT]], 32767 ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i8 *%src, i8 127 seq_cst ret i8 %res @@ -125,14 +125,14 @@ define i8 @f5(i8 *%src) { ; Check NANDs of a large unsigned value. We AND the rotated word with ; 0xfdffffff. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nilh [[ROT]], 65023 ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i8 *%src, i8 253 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll index d5cf864a3f795..6141543e0db2d 100644 --- a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll @@ -13,7 +13,7 @@ ; before being used, and that the low bits are set to 1. This sequence is ; independent of the other loop prologue instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -23,11 +23,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -35,7 +35,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: oill %r3, 65535 ; CHECK-SHIFT2: rll @@ -49,7 +49,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We AND the rotated word with 0x8000ffff. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -59,11 +59,11 @@ define i16 @f2(i16 *%src) { ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -71,7 +71,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -79,14 +79,14 @@ define i16 @f2(i16 *%src) { ; Check NANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfffeffff. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh [[ROT]], 65534 ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i16 *%src, i16 -2 seq_cst ret i16 %res @@ -94,14 +94,14 @@ define i16 @f3(i16 *%src) { ; Check ANDs of 1. We AND the rotated word with 0x0001ffff. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilh [[ROT]], 1 ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i16 *%src, i16 1 seq_cst ret i16 %res @@ -109,14 +109,14 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We AND the rotated word with 0x7fffffff. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nilh [[ROT]], 32767 ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i16 *%src, i16 32767 seq_cst ret i16 %res @@ -125,14 +125,14 @@ define i16 @f5(i16 *%src) { ; Check NANDs of a large unsigned value. We AND the rotated word with ; 0xfffdffff. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nilh [[ROT]], 65533 ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw nand i16 *%src, i16 65533 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll index cc2a0866b391c..c7a6691083d3e 100644 --- a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll @@ -1,17 +1,17 @@ ; Test 32-bit atomic NANDs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check NANDs of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: nr %r0, %r4 ; CHECK: xilf %r0, 4294967295 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw nand i32 *%src, i32 %b seq_cst ret i32 %res @@ -19,14 +19,14 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check NANDs of 1. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: nilf %r0, 1 ; CHECK: xilf %r0, 4294967295 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw nand i32 *%src, i32 1 seq_cst ret i32 %res @@ -34,7 +34,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check NANDs of the low end of the NILH range. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nilh %r0, 0 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 @@ -44,7 +44,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check the next value up, which must use NILF. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nilf %r0, 65536 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 @@ -54,7 +54,7 @@ define i32 @f4(i32 %dummy, i32 *%src) { ; Check the largest useful NILL value. define i32 @f5(i32 %dummy, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: nill %r0, 65534 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 @@ -64,7 +64,7 @@ define i32 @f5(i32 %dummy, i32 *%src) { ; Check the low end of the NILL range. define i32 @f6(i32 %dummy, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: nill %r0, 0 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 @@ -74,7 +74,7 @@ define i32 @f6(i32 %dummy, i32 *%src) { ; Check the largest useful NILH value, which is one less than the above. define i32 @f7(i32 %dummy, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: nilh %r0, 65534 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 @@ -84,7 +84,7 @@ define i32 @f7(i32 %dummy, i32 *%src) { ; Check the highest useful NILF value, which is one less than the above. define i32 @f8(i32 %dummy, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: nilf %r0, 4294901758 ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll index 0c857d97fe83c..91fe639cd726a 100644 --- a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll @@ -1,10 +1,10 @@ ; Test 64-bit atomic NANDs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check NANDs of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 @@ -12,171 +12,165 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; CHECK: lcgr %r0, %r0 ; CHECK: aghi %r0, -1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw nand i64 *%src, i64 %b seq_cst ret i64 %res } -; Check NANDs of 1, which must be done using a register. +; Check NANDs of 1, which are done using a register. (We could use RISBG +; instead, but that isn't implemented yet.) define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ngr ; CHECK: br %r14 %res = atomicrmw nand i64 *%src, i64 1 seq_cst ret i64 %res } -; Check the low end of the NIHF range. +; Check the equivalent of NIHF with 1, which can use RISBG instead. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: lgr %r0, %r2 -; CHECK: nihf %r0, 0 +; CHECK: risbg %r0, %r2, 31, 191, 0 ; CHECK: lcgr %r0, %r0 ; CHECK: aghi %r0, -1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 4294967295 seq_cst + %res = atomicrmw nand i64 *%src, i64 8589934591 seq_cst ret i64 %res } -; Check the next value up, which must use a register. +; Check the lowest NIHF value outside the range of RISBG. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: -; CHECK: ngr +; CHECK-LABEL: f4: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: nihf %r0, 2 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: jl [[LABEL]] ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 4294967296 seq_cst + %res = atomicrmw nand i64 *%src, i64 12884901887 seq_cst ret i64 %res } -; Check the low end of the NIHH range. +; Check the next value up, which must use a register. define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: -; CHECK: nihh %r0, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f5: +; CHECK: ngr ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 281474976710655 seq_cst + %res = atomicrmw nand i64 *%src, i64 12884901888 seq_cst ret i64 %res } -; Check the next value up, which must use a register. +; Check the lowest NIHH value outside the range of RISBG. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: -; CHECK: ngr +; CHECK-LABEL: f6: +; CHECK: nihh {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst + %res = atomicrmw nand i64 *%src, i64 844424930131967 seq_cst ret i64 %res } -; Check the highest useful NILL value. +; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: -; CHECK: nill %r0, 65534 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f7: +; CHECK: ngr ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -2 seq_cst + %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst ret i64 %res } -; Check the low end of the NILL range. +; Check the highest NILL value outside the range of RISBG. define i64 @f8(i64 %dummy, i64 *%src) { -; CHECK: f8: -; CHECK: nill %r0, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f8: +; CHECK: nill {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -65536 seq_cst + %res = atomicrmw nand i64 *%src, i64 -6 seq_cst ret i64 %res } -; Check the highest useful NILH value, which is one less than the above. +; Check the lowest NILL value outside the range of RISBG. define i64 @f9(i64 %dummy, i64 *%src) { -; CHECK: f9: -; CHECK: nilh %r0, 65534 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f9: +; CHECK: nill {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -65537 seq_cst + %res = atomicrmw nand i64 *%src, i64 -65534 seq_cst ret i64 %res } -; Check the highest useful NILF value, which is one less than the above. +; Check the highest useful NILF value. define i64 @f10(i64 %dummy, i64 *%src) { -; CHECK: f10: -; CHECK: nilf %r0, 4294901758 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f10: +; CHECK: nilf {{%r[0-5]}}, 4294901758 ; CHECK: br %r14 %res = atomicrmw nand i64 *%src, i64 -65538 seq_cst ret i64 %res } -; Check the low end of the NILH range. +; Check the highest NILH value outside the range of RISBG. define i64 @f11(i64 %dummy, i64 *%src) { -; CHECK: f11: -; CHECK: nilh %r0, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f11: +; CHECK: nilh {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -4294901761 seq_cst + %res = atomicrmw nand i64 *%src, i64 -327681 seq_cst ret i64 %res } -; Check the low end of the NILF range. +; Check the lowest NILH value outside the range of RISBG. define i64 @f12(i64 %dummy, i64 *%src) { -; CHECK: f12: -; CHECK: nilf %r0, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f12: +; CHECK: nilh {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -4294967296 seq_cst + %res = atomicrmw nand i64 *%src, i64 -4294770689 seq_cst ret i64 %res } -; Check the highest useful NIHL value, which is one less than the above. +; Check the lowest NILF value outside the range of RISBG. define i64 @f13(i64 %dummy, i64 *%src) { -; CHECK: f13: -; CHECK: nihl %r0, 65534 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f13: +; CHECK: nilf {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -4294967297 seq_cst + %res = atomicrmw nand i64 *%src, i64 -4294967294 seq_cst ret i64 %res } -; Check the low end of the NIHL range. +; Check the highest NIHL value outside the range of RISBG. define i64 @f14(i64 %dummy, i64 *%src) { -; CHECK: f14: -; CHECK: nihl %r0, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f14: +; CHECK: nihl {{%r[0-5]}}, 65530 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -281470681743361 seq_cst + %res = atomicrmw nand i64 *%src, i64 -21474836481 seq_cst ret i64 %res } -; Check the highest useful NIHH value, which is 1<<32 less than the above. +; Check the lowest NIHL value outside the range of RISBG. define i64 @f15(i64 %dummy, i64 *%src) { -; CHECK: f15: -; CHECK: nihh %r0, 65534 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f15: +; CHECK: nihl {{%r[0-5]}}, 2 ; CHECK: br %r14 - %res = atomicrmw nand i64 *%src, i64 -281474976710657 seq_cst + %res = atomicrmw nand i64 *%src, i64 -281462091808769 seq_cst ret i64 %res } -; Check the highest useful NIHF value, which is 1<<32 less than the above. +; Check the highest NIHH value outside the range of RISBG. define i64 @f16(i64 %dummy, i64 *%src) { -; CHECK: f16: -; CHECK: nihf %r0, 4294901758 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK-LABEL: f16: +; CHECK: nihh {{%r[0-5]}}, 65530 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -1407374883553281 seq_cst + ret i64 %res +} + +; Check the highest useful NIHF value. +define i64 @f17(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f17: +; CHECK: nihf {{%r[0-5]}}, 4294901758 ; CHECK: br %r14 %res = atomicrmw nand i64 *%src, i64 -281479271677953 seq_cst ret i64 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-or-01.ll b/test/CodeGen/SystemZ/atomicrmw-or-01.ll index 31303b769237b..caba621addc0a 100644 --- a/test/CodeGen/SystemZ/atomicrmw-or-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-or-01.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: or [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We OR the rotated word with 0x80000000. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) { ; CHECK: oilh [[ROT]], 32768 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i8 *%src, i8 -128 seq_cst ret i8 %res @@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) { ; Check ORs of -2 (-1 isn't useful). We OR the rotated word with 0xfe000000. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oilh [[ROT]], 65024 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i8 *%src, i8 -2 seq_cst ret i8 %res @@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) { ; Check ORs of 1. We OR the rotated word with 0x01000000. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilh [[ROT]], 256 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i8 *%src, i8 1 seq_cst ret i8 %res @@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We OR the rotated word with 0x7f000000. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilh [[ROT]], 32512 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i8 *%src, i8 127 seq_cst ret i8 %res @@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) { ; Check ORs of a large unsigned value. We OR the rotated word with ; 0xfd000000. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilh [[ROT]], 64768 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i8 *%src, i8 253 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-or-02.ll b/test/CodeGen/SystemZ/atomicrmw-or-02.ll index 9880d0b9859ff..877c642a35ae2 100644 --- a/test/CodeGen/SystemZ/atomicrmw-or-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-or-02.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: or [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We OR the rotated word with 0x80000000. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) { ; CHECK: oilh [[ROT]], 32768 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) { ; Check ORs of -2 (-1 isn't useful). We OR the rotated word with 0xfffe0000. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oilh [[ROT]], 65534 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i16 *%src, i16 -2 seq_cst ret i16 %res @@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) { ; Check ORs of 1. We OR the rotated word with 0x00010000. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilh [[ROT]], 1 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i16 *%src, i16 1 seq_cst ret i16 %res @@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We OR the rotated word with 0x7fff0000. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilh [[ROT]], 32767 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i16 *%src, i16 32767 seq_cst ret i16 %res @@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) { ; Check ORs of a large unsigned value. We OR the rotated word with ; 0xfffd0000. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilh [[ROT]], 65533 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw or i16 *%src, i16 65533 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-or-03.ll b/test/CodeGen/SystemZ/atomicrmw-or-03.ll index 33fd21b04c6ca..9a0aa86feb5df 100644 --- a/test/CodeGen/SystemZ/atomicrmw-or-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-or-03.ll @@ -1,16 +1,16 @@ ; Test 32-bit atomic ORs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check ORs of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: or %r0, %r4 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 %b seq_cst ret i32 %res @@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check the lowest useful OILL value. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: oill %r0, 1 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 1 seq_cst ret i32 %res @@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check the high end of the OILL range. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oill %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 65535 seq_cst @@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check the lowest useful OILH value, which is the next value up. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilh %r0, 1 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 65536 seq_cst @@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) { ; Check the lowest useful OILF value, which is the next value up. define i32 @f5(i32 %dummy, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 65537 seq_cst @@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) { ; Check the high end of the OILH range. define i32 @f6(i32 %dummy, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilh %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 -65536 seq_cst @@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) { ; Check the next value up, which must use OILF. define i32 @f7(i32 %dummy, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oilf %r0, 4294901761 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 -65535 seq_cst @@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) { ; Check the largest useful OILF value. define i32 @f8(i32 %dummy, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: oilf %r0, 4294967294 ; CHECK: br %r14 %res = atomicrmw or i32 *%src, i32 -2 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-or-04.ll b/test/CodeGen/SystemZ/atomicrmw-or-04.ll index a74f6f9dd501b..dbc0f11cc74c6 100644 --- a/test/CodeGen/SystemZ/atomicrmw-or-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-or-04.ll @@ -1,16 +1,16 @@ ; Test 64-bit atomic ORs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check ORs of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: ogr %r0, %r4 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 %b seq_cst ret i64 %res @@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check the lowest useful OILL value. define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: oill %r0, 1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 1 seq_cst ret i64 %res @@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) { ; Check the high end of the OILL range. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oill %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 65535 seq_cst @@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) { ; Check the lowest useful OILH value, which is the next value up. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilh %r0, 1 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 65536 seq_cst @@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) { ; Check the lowest useful OILF value, which is the next value up again. define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 65537 seq_cst @@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) { ; Check the high end of the OILH range. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilh %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 4294901760 seq_cst @@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) { ; Check the next value up, which must use OILF. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oilf %r0, 4294901761 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 4294901761 seq_cst @@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) { ; Check the high end of the OILF range. define i64 @f8(i64 %dummy, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: oilf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 4294967295 seq_cst @@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) { ; Check the lowest useful OIHL value, which is one greater than above. define i64 @f9(i64 %dummy, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: oihl %r0, 1 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 4294967296 seq_cst @@ -96,7 +96,7 @@ define i64 @f9(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register. (We could use ; combinations of OIH* and OIL* instead, but that isn't implemented.) define i64 @f10(i64 %dummy, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ogr ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 4294967297 seq_cst @@ -105,7 +105,7 @@ define i64 @f10(i64 %dummy, i64 *%src) { ; Check the high end of the OIHL range. define i64 @f11(i64 %dummy, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: oihl %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 281470681743360 seq_cst @@ -114,7 +114,7 @@ define i64 @f11(i64 %dummy, i64 *%src) { ; Check the lowest useful OIHH value, which is 1<<32 greater than above. define i64 @f12(i64 %dummy, i64 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: oihh %r0, 1 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 281474976710656 seq_cst @@ -123,7 +123,7 @@ define i64 @f12(i64 %dummy, i64 *%src) { ; Check the lowest useful OIHF value, which is 1<<32 greater again. define i64 @f13(i64 %dummy, i64 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: oihf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 281479271677952 seq_cst @@ -132,7 +132,7 @@ define i64 @f13(i64 %dummy, i64 *%src) { ; Check the high end of the OIHH range. define i64 @f14(i64 %dummy, i64 *%src) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: oihh %r0, 65535 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 18446462598732840960 seq_cst @@ -141,7 +141,7 @@ define i64 @f14(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register. define i64 @f15(i64 %dummy, i64 *%src) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: ogr ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 18446462598732840961 seq_cst @@ -150,7 +150,7 @@ define i64 @f15(i64 %dummy, i64 *%src) { ; Check the high end of the OIHF range. define i64 @f16(i64 %dummy, i64 *%src) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: oihf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw or i64 *%src, i64 -4294967296 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll index d073dc5ec29fa..2c08ebd9f5fc0 100644 --- a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: sr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We add 0x80000000 to the rotated word. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) { ; CHECK: afi [[ROT]], -2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i8 *%src, i8 -128 seq_cst ret i8 %res @@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) { ; Check subtraction of -1. We add 0x01000000 to the rotated word. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: afi [[ROT]], 16777216 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i8 *%src, i8 -1 seq_cst ret i8 %res @@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) { ; Check subtraction of -1. We add 0xff000000 to the rotated word. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi [[ROT]], -16777216 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i8 *%src, i8 1 seq_cst ret i8 %res @@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We add 0x81000000 to the rotated word. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi [[ROT]], -2130706432 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i8 *%src, i8 127 seq_cst ret i8 %res @@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) { ; Check subtraction of a large unsigned value. We add 0x02000000 to the ; rotated word. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi [[ROT]], 33554432 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i8 *%src, i8 254 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll index 449d92ff3b304..f82ebd9aaaae7 100644 --- a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: sr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We add 0x80000000 to the rotated word. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) { ; CHECK: afi [[ROT]], -2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) { ; Check subtraction of -1. We add 0x00010000 to the rotated word. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: afi [[ROT]], 65536 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i16 *%src, i16 -1 seq_cst ret i16 %res @@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) { ; Check subtraction of 1. We add 0xffff0000 to the rotated word. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi [[ROT]], -65536 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i16 *%src, i16 1 seq_cst ret i16 %res @@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We add 0x80010000 to the rotated word. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi [[ROT]], -2147418112 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i16 *%src, i16 32767 seq_cst ret i16 %res @@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) { ; Check subtraction of a large unsigned value. We add 0x00020000 to the ; rotated word. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi [[ROT]], 131072 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw sub i16 *%src, i16 65534 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll index da07fb57ef726..a3031c6806b5f 100644 --- a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll @@ -1,16 +1,16 @@ ; Test 32-bit atomic subtractions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check subtraction of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lr %r0, %r2 ; CHECK: sr %r0, %r4 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 %b seq_cst ret i32 %res @@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check subtraction of 1, which can use AHI. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lr %r0, %r2 ; CHECK: ahi %r0, -1 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 1 seq_cst ret i32 %res @@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check the low end of the AHI range. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ahi %r0, -32768 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 32768 seq_cst @@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check the next value down, which must use AFI. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi %r0, -32769 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 32769 seq_cst @@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) { ; Check the low end of the AFI range. define i32 @f5(i32 %dummy, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi %r0, -2147483648 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 2147483648 seq_cst @@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) { ; Check the next value up, which gets treated as a positive operand. define i32 @f6(i32 %dummy, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: afi %r0, 2147483647 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 2147483649 seq_cst @@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) { ; Check subtraction of -1, which can use AHI. define i32 @f7(i32 %dummy, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ahi %r0, 1 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 -1 seq_cst @@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) { ; Check the high end of the AHI range. define i32 @f8(i32 %dummy, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ahi %r0, 32767 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 -32767 seq_cst @@ -86,7 +86,7 @@ define i32 @f8(i32 %dummy, i32 *%src) { ; Check the next value down, which must use AFI instead. define i32 @f9(i32 %dummy, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: afi %r0, 32768 ; CHECK: br %r14 %res = atomicrmw sub i32 *%src, i32 -32768 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll index 26f75afe85f4f..911648b6137ea 100644 --- a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll @@ -1,16 +1,16 @@ ; Test 64-bit atomic subtractions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check subtraction of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: sgr %r0, %r4 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 %b seq_cst ret i64 %res @@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check subtraction of 1, which can use AGHI. define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: aghi %r0, -1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 1 seq_cst ret i64 %res @@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) { ; Check the low end of the AGHI range. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r0, -32768 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 32768 seq_cst @@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) { ; Check the next value up, which must use AGFI. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r0, -32769 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 32769 seq_cst @@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) { ; Check the low end of the AGFI range. define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r0, -2147483648 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 2147483648 seq_cst @@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register operation. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sgr ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 2147483649 seq_cst @@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) { ; Check subtraction of -1, which can use AGHI. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: aghi %r0, 1 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 -1 seq_cst @@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) { ; Check the high end of the AGHI range. define i64 @f8(i64 %dummy, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r0, 32767 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 -32767 seq_cst @@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) { ; Check the next value down, which must use AGFI instead. define i64 @f9(i64 %dummy, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r0, 32768 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 -32768 seq_cst @@ -95,7 +95,7 @@ define i64 @f9(i64 %dummy, i64 *%src) { ; Check the high end of the AGFI range. define i64 @f10(i64 %dummy, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r0, 2147483647 ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 -2147483647 seq_cst @@ -104,7 +104,7 @@ define i64 @f10(i64 %dummy, i64 *%src) { ; Check the next value down, which must use a register operation. define i64 @f11(i64 %dummy, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: sgr ; CHECK: br %r14 %res = atomicrmw sub i64 *%src, i64 -2147483648 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll index e33597b7297df..52575c6349716 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll @@ -11,7 +11,7 @@ ; being used in the RISBG (in contrast to things like atomic addition, ; which shift %r3 left so that %b is at the high end of the word). define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -20,11 +20,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: risbg [[ROT]], %r3, 32, 39, 24 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT: f1: +; CHECK-SHIFT-LABEL: f1: ; CHECK-SHIFT-NOT: %r3 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT-NOT: %r3 @@ -43,12 +43,12 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check exchange with a constant. We should force the constant into ; a register and use the sequence above. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi [[VALUE:%r[0-9]+]], 88 ; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 39, 24 ; CHECK: br %r14 ; -; CHECK-SHIFT: f2: +; CHECK-SHIFT-LABEL: f2: ; CHECK-SHIFT: br %r14 %res = atomicrmw xchg i8 *%src, i8 88 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll index 31f802625a320..04be623ada892 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll @@ -11,7 +11,7 @@ ; being used in the RISBG (in contrast to things like atomic addition, ; which shift %r3 left so that %b is at the high end of the word). define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -20,11 +20,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: risbg [[ROT]], %r3, 32, 47, 16 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT: f1: +; CHECK-SHIFT-LABEL: f1: ; CHECK-SHIFT-NOT: %r3 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT-NOT: %r3 @@ -43,12 +43,12 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check exchange with a constant. We should force the constant into ; a register and use the sequence above. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi [[VALUE:%r[0-9]+]], -25536 ; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 47, 16 ; CHECK: br %r14 ; -; CHECK-SHIFT: f2: +; CHECK-SHIFT-LABEL: f2: ; CHECK-SHIFT: br %r14 %res = atomicrmw xchg i16 *%src, i16 40000 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll index 37581ab9d602b..a602a02a189e9 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll @@ -4,11 +4,11 @@ ; Check register exchange. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: cs %r2, %r4, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xchg i32 *%src, i32 %b seq_cst ret i32 %res @@ -16,7 +16,7 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the aligned CS range. define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 4092(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3) ; CHECK: br %r14 @@ -27,7 +27,7 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word up, which requires CSY. define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ly %r2, 4096(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3) ; CHECK: br %r14 @@ -38,7 +38,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the aligned CSY range. define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ly %r2, 524284(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3) ; CHECK: br %r14 @@ -49,7 +49,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word up, which needs separate address logic. define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r3, 524288 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -61,7 +61,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { ; Check the high end of the negative aligned CSY range. define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ly %r2, -4(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3) ; CHECK: br %r14 @@ -72,7 +72,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { ; Check the low end of the CSY range. define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ly %r2, -524288(%r3) ; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3) ; CHECK: br %r14 @@ -83,7 +83,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { ; Check the next word down, which needs separate address logic. define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, -524292 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -95,7 +95,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { ; Check that indexed addresses are not allowed. define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agr %r3, %r4 ; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) @@ -109,12 +109,12 @@ define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) { ; Check exchange of a constant. We should force it into a register and ; use the sequence above. define i32 @f10(i32 %dummy, i32 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llill [[VALUE:%r[0-9+]]], 40000 ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: cs %r2, [[VALUE]], 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xchg i32 *%src, i32 40000 seq_cst ret i32 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll index a68295ea8b042..80c0eeb7121b4 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll @@ -4,11 +4,11 @@ ; Check register exchange. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: csg %r2, %r4, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xchg i64 *%src, i64 %b seq_cst ret i64 %res @@ -16,7 +16,7 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check the high end of the aligned CSG range. define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 524280(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3) ; CHECK: br %r14 @@ -27,7 +27,7 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { ; Check the next doubleword up, which requires separate address logic. define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r3, 524288 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -39,7 +39,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { ; Check the low end of the CSG range. define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lg %r2, -524288(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3) ; CHECK: br %r14 @@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { ; Check the next doubleword down, which requires separate address logic. define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r3, -524296 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -62,7 +62,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { ; Check that indexed addresses are not allowed. define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agr %r3, %r4 ; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) @@ -76,12 +76,12 @@ define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) { ; Check exchange of a constant. We should force it into a register and ; use the sequence above. define i64 @f7(i64 %dummy, i64 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llilf [[VALUE:%r[0-9+]]], 3000000000 ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: csg %r2, [[VALUE]], 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xchg i64 *%ptr, i64 3000000000 seq_cst ret i64 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll index 13cdf02f486c6..e8fef2d31d2ca 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i8 @f1(i8 *%src, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK: xr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 24 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) { ; Check the minimum signed value. We XOR the rotated word with 0x80000000. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) { ; CHECK: xilf [[ROT]], 2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i8 *%src, i8 -128 seq_cst ret i8 %res @@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) { ; Check XORs of -1. We XOR the rotated word with 0xff000000. define i8 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xilf [[ROT]], 4278190080 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i8 *%src, i8 -1 seq_cst ret i8 %res @@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) { ; Check XORs of 1. We XOR the rotated word with 0x01000000. define i8 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xilf [[ROT]], 16777216 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i8 *%src, i8 1 seq_cst ret i8 %res @@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) { ; Check the maximum signed value. We XOR the rotated word with 0x7f000000. define i8 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xilf [[ROT]], 2130706432 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i8 *%src, i8 127 seq_cst ret i8 %res @@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) { ; Check XORs of a large unsigned value. We XOR the rotated word with ; 0xfd000000. define i8 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xilf [[ROT]], 4244635648 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i8 *%src, i8 253 seq_cst ret i8 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll index 4faa64f8e837b..9405c2ec0c080 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll @@ -13,7 +13,7 @@ ; before being used. This shift is independent of the other loop prologue ; instructions. define i16 @f1(i16 *%src, i16 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK: xr [[ROT]], %r3 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f1: +; CHECK-SHIFT1-LABEL: f1: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f1: +; CHECK-SHIFT2-LABEL: f1: ; CHECK-SHIFT2: sll %r3, 16 ; CHECK-SHIFT2: rll ; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3 @@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) { ; Check the minimum signed value. We XOR the rotated word with 0x80000000. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK: nill %r2, 65532 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) @@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) { ; CHECK: xilf [[ROT]], 2147483648 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) ; CHECK: cs [[OLD]], [[NEW]], 0(%r2) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK: br %r14 ; -; CHECK-SHIFT1: f2: +; CHECK-SHIFT1-LABEL: f2: ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT1: rll @@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) { ; CHECK-SHIFT1: rll ; CHECK-SHIFT1: br %r14 ; -; CHECK-SHIFT2: f2: +; CHECK-SHIFT2-LABEL: f2: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i16 *%src, i16 -32768 seq_cst ret i16 %res @@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) { ; Check XORs of -1. We XOR the rotated word with 0xffff0000. define i16 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xilf [[ROT]], 4294901760 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f3: +; CHECK-SHIFT1-LABEL: f3: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f3: +; CHECK-SHIFT2-LABEL: f3: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i16 *%src, i16 -1 seq_cst ret i16 %res @@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) { ; Check XORs of 1. We XOR the rotated word with 0x00010000. define i16 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xilf [[ROT]], 65536 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f4: +; CHECK-SHIFT1-LABEL: f4: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f4: +; CHECK-SHIFT2-LABEL: f4: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i16 *%src, i16 1 seq_cst ret i16 %res @@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) { ; Check the maximum signed value. We XOR the rotated word with 0x7fff0000. define i16 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xilf [[ROT]], 2147418112 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f5: +; CHECK-SHIFT1-LABEL: f5: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f5: +; CHECK-SHIFT2-LABEL: f5: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i16 *%src, i16 32767 seq_cst ret i16 %res @@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) { ; Check XORs of a large unsigned value. We XOR the rotated word with ; 0xfffd0000. define i16 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xilf [[ROT]], 4294770688 ; CHECK: br %r14 ; -; CHECK-SHIFT1: f6: +; CHECK-SHIFT1-LABEL: f6: ; CHECK-SHIFT1: br %r14 -; CHECK-SHIFT2: f6: +; CHECK-SHIFT2-LABEL: f6: ; CHECK-SHIFT2: br %r14 %res = atomicrmw xor i16 *%src, i16 65533 seq_cst ret i16 %res diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll index 23884f888e13b..d719d0bd7140e 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll @@ -1,16 +1,16 @@ ; Test 32-bit atomic XORs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check XORs of a variable. define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: xr %r0, %r4 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xor i32 *%src, i32 %b seq_cst ret i32 %res @@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; Check the lowest useful constant. define i32 @f2(i32 %dummy, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 ; CHECK: xilf %r0, 1 ; CHECK: cs %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xor i32 *%src, i32 1 seq_cst ret i32 %res @@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) { ; Check an arbitrary constant. define i32 @f3(i32 %dummy, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xilf %r0, 3000000000 ; CHECK: br %r14 %res = atomicrmw xor i32 *%src, i32 3000000000 seq_cst @@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) { ; Check bitwise negation. define i32 @f4(i32 %dummy, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw xor i32 *%src, i32 -1 seq_cst diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll index 21130fb47776e..c17a879f37cd2 100644 --- a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll +++ b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll @@ -1,16 +1,16 @@ ; Test 64-bit atomic XORs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check XORs of a variable. define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: xgr %r0, %r4 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 %b seq_cst ret i64 %res @@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; Check the lowest useful XILF value. define i64 @f2(i64 %dummy, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: xilf %r0, 1 ; CHECK: csg %r2, %r0, 0(%r3) -; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 1 seq_cst ret i64 %res @@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) { ; Check the high end of the XILF range. define i64 @f3(i64 %dummy, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xilf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 4294967295 seq_cst @@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) { ; Check the lowest useful XIHF value, which is one greater than above. define i64 @f4(i64 %dummy, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xihf %r0, 1 ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 4294967296 seq_cst @@ -51,7 +51,7 @@ define i64 @f4(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register. (We could use ; combinations of XIH* and XIL* instead, but that isn't implemented.) define i64 @f5(i64 %dummy, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xgr ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 4294967297 seq_cst @@ -60,7 +60,7 @@ define i64 @f5(i64 %dummy, i64 *%src) { ; Check the high end of the XIHF range. define i64 @f6(i64 %dummy, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xihf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 -4294967296 seq_cst @@ -69,7 +69,7 @@ define i64 @f6(i64 %dummy, i64 *%src) { ; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: xgr ; CHECK: br %r14 %res = atomicrmw xor i64 *%src, i64 -4294967295 seq_cst diff --git a/test/CodeGen/SystemZ/branch-01.ll b/test/CodeGen/SystemZ/branch-01.ll index 8ff91ac38e80d..12ed2d32a801a 100644 --- a/test/CodeGen/SystemZ/branch-01.ll +++ b/test/CodeGen/SystemZ/branch-01.ll @@ -3,10 +3,10 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i8 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: .L[[LABEL:.*]]: ; CHECK: mvi 0(%r2), 1 -; CHECK: j{{g?}} .L[[LABEL]] +; CHECK: j .L[[LABEL]] br label %loop loop: store volatile i8 1, i8 *%dest diff --git a/test/CodeGen/SystemZ/branch-02.ll b/test/CodeGen/SystemZ/branch-02.ll index cde9b568b38c4..38b5d27049d80 100644 --- a/test/CodeGen/SystemZ/branch-02.ll +++ b/test/CodeGen/SystemZ/branch-02.ll @@ -1,14 +1,15 @@ ; Test all condition-code masks that are relevant for signed integer -; comparisons. +; comparisons, in cases where a separate branch is better than COMPARE +; AND BRANCH. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i32 *%src, i32 %target) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}e .L[[LABEL]] +; CHECK-NEXT: je .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -19,11 +20,11 @@ exit: } define void @f2(i32 *%src, i32 %target) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}lh .L[[LABEL]] +; CHECK-NEXT: jlh .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -34,11 +35,11 @@ exit: } define void @f3(i32 *%src, i32 %target) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}le .L[[LABEL]] +; CHECK-NEXT: jle .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -49,11 +50,11 @@ exit: } define void @f4(i32 *%src, i32 %target) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}l .L[[LABEL]] +; CHECK-NEXT: jl .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -64,11 +65,11 @@ exit: } define void @f5(i32 *%src, i32 %target) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}h .L[[LABEL]] +; CHECK-NEXT: jh .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -79,11 +80,11 @@ exit: } define void @f6(i32 *%src, i32 %target) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: c %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}he .L[[LABEL]] +; CHECK-NEXT: jhe .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src diff --git a/test/CodeGen/SystemZ/branch-03.ll b/test/CodeGen/SystemZ/branch-03.ll index 1e447d034a396..ef31a9c696eab 100644 --- a/test/CodeGen/SystemZ/branch-03.ll +++ b/test/CodeGen/SystemZ/branch-03.ll @@ -3,11 +3,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(i32 *%src, i32 %target) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: cl %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}le .L[[LABEL]] +; CHECK-NEXT: jle .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -18,11 +18,11 @@ exit: } define void @f2(i32 *%src, i32 %target) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: cl %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}l .L[[LABEL]] +; CHECK-NEXT: jl .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -33,11 +33,11 @@ exit: } define void @f3(i32 *%src, i32 %target) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: cl %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}h .L[[LABEL]] +; CHECK-NEXT: jh .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src @@ -48,11 +48,11 @@ exit: } define void @f4(i32 *%src, i32 %target) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: cl %r3, 0(%r2) -; CHECK-NEXT: j{{g?}}he .L[[LABEL]] +; CHECK-NEXT: jhe .L[[LABEL]] br label %loop loop: %val = load volatile i32 *%src diff --git a/test/CodeGen/SystemZ/branch-04.ll b/test/CodeGen/SystemZ/branch-04.ll index 3d4175041db21..fafb234616f11 100644 --- a/test/CodeGen/SystemZ/branch-04.ll +++ b/test/CodeGen/SystemZ/branch-04.ll @@ -4,11 +4,11 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @f1(float *%src, float %target) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}e .L[[LABEL]] +; CHECK-NEXT: je .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -19,11 +19,11 @@ exit: } define void @f2(float *%src, float %target) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}lh .L[[LABEL]] +; CHECK-NEXT: jlh .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -34,11 +34,11 @@ exit: } define void @f3(float *%src, float %target) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}le .L[[LABEL]] +; CHECK-NEXT: jle .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -49,11 +49,11 @@ exit: } define void @f4(float *%src, float %target) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}l .L[[LABEL]] +; CHECK-NEXT: jl .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -64,11 +64,11 @@ exit: } define void @f5(float *%src, float %target) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}h .L[[LABEL]] +; CHECK-NEXT: jh .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -79,11 +79,11 @@ exit: } define void @f6(float *%src, float %target) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}he .L[[LABEL]] +; CHECK-NEXT: jhe .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -94,11 +94,11 @@ exit: } define void @f7(float *%src, float %target) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}nlh .L[[LABEL]] +; CHECK-NEXT: jnlh .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -109,11 +109,11 @@ exit: } define void @f8(float *%src, float %target) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}ne .L[[LABEL]] +; CHECK-NEXT: jne .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -124,11 +124,11 @@ exit: } define void @f9(float *%src, float %target) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}nh .L[[LABEL]] +; CHECK-NEXT: jnh .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -139,11 +139,11 @@ exit: } define void @f10(float *%src, float %target) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}nhe .L[[LABEL]] +; CHECK-NEXT: jnhe .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -154,11 +154,11 @@ exit: } define void @f11(float *%src, float %target) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}nle .L[[LABEL]] +; CHECK-NEXT: jnle .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -169,11 +169,11 @@ exit: } define void @f12(float *%src, float %target) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}nl .L[[LABEL]] +; CHECK-NEXT: jnl .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -186,11 +186,11 @@ exit: ; "jno" == "jump if no overflow", which corresponds to "jump if ordered" ; rather than "jump if not ordered" after a floating-point comparison. define void @f13(float *%src, float %target) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}no .L[[LABEL]] +; CHECK-NEXT: jno .L[[LABEL]] br label %loop loop: %val = load volatile float *%src @@ -203,11 +203,11 @@ exit: ; "jo" == "jump if overflow", which corresponds to "jump if not ordered" ; rather than "jump if ordered" after a floating-point comparison. define void @f14(float *%src, float %target) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: .cfi_startproc ; CHECK: .L[[LABEL:.*]]: ; CHECK: ceb %f0, 0(%r2) -; CHECK-NEXT: j{{g?}}o .L[[LABEL]] +; CHECK-NEXT: jo .L[[LABEL]] br label %loop loop: %val = load volatile float *%src diff --git a/test/CodeGen/SystemZ/branch-05.ll b/test/CodeGen/SystemZ/branch-05.ll index d149e0b7013b0..b2157b5ac7781 100644 --- a/test/CodeGen/SystemZ/branch-05.ll +++ b/test/CodeGen/SystemZ/branch-05.ll @@ -3,10 +3,9 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define i32 @f1(i32 %x, i32 %y, i32 %op) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ahi %r4, -1 -; CHECK: clfi %r4, 5 -; CHECK-NEXT: j{{g?}}g +; CHECK: clijh %r4, 5, ; CHECK: llgfr [[OP64:%r[0-5]]], %r4 ; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3 ; CHECK: larl [[BASE:%r[1-5]]] diff --git a/test/CodeGen/SystemZ/branch-06.ll b/test/CodeGen/SystemZ/branch-06.ll new file mode 100644 index 0000000000000..2fa23b744afb3 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-06.ll @@ -0,0 +1,190 @@ +; Test all condition-code masks that are relevant for CRJ. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() +@g1 = global i16 0 + +define void @f1(i32 %target) { +; CHECK-LABEL: f1: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crje %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp eq i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(i32 %target) { +; CHECK-LABEL: f2: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crjlh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp ne i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(i32 %target) { +; CHECK-LABEL: f3: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crjle %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp sle i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(i32 %target) { +; CHECK-LABEL: f4: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crjl %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp slt i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f5(i32 %target) { +; CHECK-LABEL: f5: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crjh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp sgt i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f6(i32 %target) { +; CHECK-LABEL: f6: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: crjhe %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp sge i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Check that CRJ is used for checking equality with a zero-extending +; character load. +define void @f7(i8 *%targetptr) { +; CHECK-LABEL: f7: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: llc [[REG:%r[0-5]]], +; CHECK: crje %r2, [[REG]], .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %byte = load i8 *%targetptr + %target = zext i8 %byte to i32 + %cond = icmp eq i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; ...and zero-extending i16 loads. +define void @f8(i16 *%targetptr) { +; CHECK-LABEL: f8: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: llh [[REG:%r[0-5]]], +; CHECK: crje %r2, [[REG]], .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %half = load i16 *%targetptr + %target = zext i16 %half to i32 + %cond = icmp eq i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; ...unless the address is a global. +define void @f9(i16 *%targetptr) { +; CHECK-LABEL: f9: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clhrl %r2, g1 +; CHECK: je .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %half = load i16 *@g1 + %target = zext i16 %half to i32 + %cond = icmp eq i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Check that CRJ is used for checking order between two zero-extending +; byte loads, even if the original comparison was unsigned. +define void @f10(i8 *%targetptr1) { +; CHECK-LABEL: f10: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK-DAG: llc [[REG1:%r[0-5]]], 0( +; CHECK-DAG: llc [[REG2:%r[0-5]]], 1( +; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %targetptr2 = getelementptr i8 *%targetptr1, i64 1 + %byte1 = load i8 *%targetptr1 + %byte2 = load i8 *%targetptr2 + %ext1 = zext i8 %byte1 to i32 + %ext2 = zext i8 %byte2 to i32 + %cond = icmp ult i32 %ext1, %ext2 + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; ...likewise halfword loads. +define void @f11(i16 *%targetptr1) { +; CHECK-LABEL: f11: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK-DAG: llh [[REG1:%r[0-5]]], 0( +; CHECK-DAG: llh [[REG2:%r[0-5]]], 2( +; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %targetptr2 = getelementptr i16 *%targetptr1, i64 1 + %half1 = load i16 *%targetptr1 + %half2 = load i16 *%targetptr2 + %ext1 = zext i16 %half1 to i32 + %ext2 = zext i16 %half2 to i32 + %cond = icmp ult i32 %ext1, %ext2 + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/branch-07.ll b/test/CodeGen/SystemZ/branch-07.ll new file mode 100644 index 0000000000000..bac607133a89d --- /dev/null +++ b/test/CodeGen/SystemZ/branch-07.ll @@ -0,0 +1,157 @@ +; Test all condition-code masks that are relevant for CGRJ. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Test EQ. +define void @f1(i64 %target) { +; CHECK-LABEL: f1: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrje %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp eq i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test NE. +define void @f2(i64 %target) { +; CHECK-LABEL: f2: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrjlh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp ne i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test SLE. +define void @f3(i64 %target) { +; CHECK-LABEL: f3: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrjle %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp sle i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test SLT. +define void @f4(i64 %target) { +; CHECK-LABEL: f4: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrjl %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp slt i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test SGT. +define void @f5(i64 %target) { +; CHECK-LABEL: f5: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrjh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp sgt i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test SGE. +define void @f6(i64 %target) { +; CHECK-LABEL: f6: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: cgrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp sge i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; Test a vector of 0/-1 results for i32 EQ. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -268435456 +; CHECK: sra [[REG]], 31 +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp eq <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i32> + %ret = bitcast <2 x i32> %ext to i64 + ret i64 %ret +} + +; Test a vector of 0/-1 results for i32 NE. +define i64 @f8(i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1879048192 +; CHECK: sra [[REG]], 31 +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp ne <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i32> + %ret = bitcast <2 x i32> %ext to i64 + ret i64 %ret +} + +; Test a vector of 0/-1 results for i64 EQ. +define void @f9(i64 %a, i64 %b, <2 x i64> *%dest) { +; CHECK-LABEL: f9: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -268435456 +; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32 +; CHECK: srag {{%r[0-5]}}, [[REG2]], 63 +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp eq <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, <2 x i64> *%dest + ret void +} + +; Test a vector of 0/-1 results for i64 NE. +define void @f10(i64 %a, i64 %b, <2 x i64> *%dest) { +; CHECK-LABEL: f10: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1879048192 +; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32 +; CHECK: srag {{%r[0-5]}}, [[REG2]], 63 +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp ne <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, <2 x i64> *%dest + ret void +} diff --git a/test/CodeGen/SystemZ/branch-08.ll b/test/CodeGen/SystemZ/branch-08.ll new file mode 100644 index 0000000000000..6741d29aec034 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-08.ll @@ -0,0 +1,46 @@ +; Test SystemZInstrInfo::AnalyzeBranch and SystemZInstrInfo::InsertBranch. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo() noreturn + +; Check a case where a separate branch is needed and where the original +; order should be reversed. +define i32 @f1(i32 %a, i32 *%bptr) { +; CHECK-LABEL: f1: +; CHECK: cl %r2, 0(%r3) +; CHECK: jl .L[[LABEL:.*]] +; CHECK: br %r14 +; CHECK: .L[[LABEL]]: +; CHECK: brasl %r14, foo@PLT +entry: + %b = load i32 *%bptr + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %callit, label %return + +callit: + call void @foo() + unreachable + +return: + ret i32 1 +} + +; Same again with a fused compare and branch. +define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: cije %r2, 0, .L[[LABEL:.*]] +; CHECK: br %r14 +; CHECK: .L[[LABEL]]: +; CHECK: brasl %r14, foo@PLT +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %callit, label %return + +callit: + call void @foo() + unreachable + +return: + ret i32 1 +} diff --git a/test/CodeGen/SystemZ/branch-09.ll b/test/CodeGen/SystemZ/branch-09.ll new file mode 100644 index 0000000000000..5591f5bede6ad --- /dev/null +++ b/test/CodeGen/SystemZ/branch-09.ll @@ -0,0 +1,62 @@ +; Test all condition-code masks that are relevant for CLRJ. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() +@g1 = global i16 0 + +define void @f1(i32 %target) { +; CHECK-LABEL: f1: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clrjle %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp ule i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(i32 %target) { +; CHECK-LABEL: f2: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clrjl %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp ult i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(i32 %target) { +; CHECK-LABEL: f3: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clrjh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp ugt i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(i32 %target) { +; CHECK-LABEL: f4: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i32 @foo() + %cond = icmp uge i32 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/branch-10.ll b/test/CodeGen/SystemZ/branch-10.ll new file mode 100644 index 0000000000000..ec6e759e8e747 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-10.ll @@ -0,0 +1,62 @@ +; Test all condition-code masks that are relevant for CLGRJ. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() +@g1 = global i16 0 + +define void @f1(i64 %target) { +; CHECK-LABEL: f1: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clgrjle %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp ule i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(i64 %target) { +; CHECK-LABEL: f2: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clgrjl %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp ult i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(i64 %target) { +; CHECK-LABEL: f3: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clgrjh %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp ugt i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(i64 %target) { +; CHECK-LABEL: f4: +; CHECK: .cfi_def_cfa_offset +; CHECK: .L[[LABEL:.*]]: +; CHECK: clgrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]] + br label %loop +loop: + %val = call i64 @foo() + %cond = icmp uge i64 %val, %target + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/bswap-01.ll b/test/CodeGen/SystemZ/bswap-01.ll index 952903df50f9d..7e6c83af3f817 100644 --- a/test/CodeGen/SystemZ/bswap-01.ll +++ b/test/CodeGen/SystemZ/bswap-01.ll @@ -7,18 +7,18 @@ declare i64 @llvm.bswap.i64(i64 %a) ; Check 32-bit register-to-register byteswaps. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lrvr [[REGISTER:%r[0-5]]], %r2 -; CHECk: br %r14 +; CHECK: br %r14 %swapped = call i32 @llvm.bswap.i32(i32 %a) ret i32 %swapped } ; Check 64-bit register-to-register byteswaps. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lrvgr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %swapped = call i64 @llvm.bswap.i64(i64 %a) ret i64 %swapped } diff --git a/test/CodeGen/SystemZ/bswap-02.ll b/test/CodeGen/SystemZ/bswap-02.ll index e9b7eb5f055b0..db69ea53dfe1a 100644 --- a/test/CodeGen/SystemZ/bswap-02.ll +++ b/test/CodeGen/SystemZ/bswap-02.ll @@ -6,7 +6,7 @@ declare i32 @llvm.bswap.i32(i32 %a) ; Check LRV with no displacement. define i32 @f1(i32 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lrv %r2, 0(%r2) ; CHECK: br %r14 %a = load i32 *%src @@ -16,7 +16,7 @@ define i32 @f1(i32 *%src) { ; Check the high end of the aligned LRV range. define i32 @f2(i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lrv %r2, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -28,7 +28,7 @@ define i32 @f2(i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f3(i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: lrv %r2, 0(%r2) ; CHECK: br %r14 @@ -40,7 +40,7 @@ define i32 @f3(i32 *%src) { ; Check the high end of the negative aligned LRV range. define i32 @f4(i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lrv %r2, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -51,7 +51,7 @@ define i32 @f4(i32 *%src) { ; Check the low end of the LRV range. define i32 @f5(i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lrv %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -63,7 +63,7 @@ define i32 @f5(i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524292 ; CHECK: lrv %r2, 0(%r2) ; CHECK: br %r14 @@ -75,7 +75,7 @@ define i32 @f6(i32 *%src) { ; Check that LRV allows an index. define i32 @f7(i64 %src, i64 %index) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lrv %r2, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -85,3 +85,92 @@ define i32 @f7(i64 %src, i64 %index) { %swapped = call i32 @llvm.bswap.i32(i32 %a) ret i32 %swapped } + +; Check that volatile accesses do not use LRV, which might access the +; storage multple times. +define i32 @f8(i32 *%src) { +; CHECK-LABEL: f8: +; CHECK: l [[REG:%r[0-5]]], 0(%r2) +; CHECK: lrvr %r2, [[REG]] +; CHECK: br %r14 + %a = load volatile i32 *%src + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Test a case where we spill the source of at least one LRVR. We want +; to use LRV if possible. +define void @f9(i32 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: lrv {{%r[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %swapped0 = call i32 @llvm.bswap.i32(i32 %val0) + %swapped1 = call i32 @llvm.bswap.i32(i32 %val1) + %swapped2 = call i32 @llvm.bswap.i32(i32 %val2) + %swapped3 = call i32 @llvm.bswap.i32(i32 %val3) + %swapped4 = call i32 @llvm.bswap.i32(i32 %val4) + %swapped5 = call i32 @llvm.bswap.i32(i32 %val5) + %swapped6 = call i32 @llvm.bswap.i32(i32 %val6) + %swapped7 = call i32 @llvm.bswap.i32(i32 %val7) + %swapped8 = call i32 @llvm.bswap.i32(i32 %val8) + %swapped9 = call i32 @llvm.bswap.i32(i32 %val9) + %swapped10 = call i32 @llvm.bswap.i32(i32 %val10) + %swapped11 = call i32 @llvm.bswap.i32(i32 %val11) + %swapped12 = call i32 @llvm.bswap.i32(i32 %val12) + %swapped13 = call i32 @llvm.bswap.i32(i32 %val13) + %swapped14 = call i32 @llvm.bswap.i32(i32 %val14) + %swapped15 = call i32 @llvm.bswap.i32(i32 %val15) + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + + store volatile i32 %swapped0, i32 *%ptr + store volatile i32 %swapped1, i32 *%ptr + store volatile i32 %swapped2, i32 *%ptr + store volatile i32 %swapped3, i32 *%ptr + store volatile i32 %swapped4, i32 *%ptr + store volatile i32 %swapped5, i32 *%ptr + store volatile i32 %swapped6, i32 *%ptr + store volatile i32 %swapped7, i32 *%ptr + store volatile i32 %swapped8, i32 *%ptr + store volatile i32 %swapped9, i32 *%ptr + store volatile i32 %swapped10, i32 *%ptr + store volatile i32 %swapped11, i32 *%ptr + store volatile i32 %swapped12, i32 *%ptr + store volatile i32 %swapped13, i32 *%ptr + store volatile i32 %swapped14, i32 *%ptr + store volatile i32 %swapped15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/bswap-03.ll b/test/CodeGen/SystemZ/bswap-03.ll index 2e6bcdce26515..d9e5ad1b52f6e 100644 --- a/test/CodeGen/SystemZ/bswap-03.ll +++ b/test/CodeGen/SystemZ/bswap-03.ll @@ -6,7 +6,7 @@ declare i64 @llvm.bswap.i64(i64 %a) ; Check LRVG with no displacement. define i64 @f1(i64 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lrvg %r2, 0(%r2) ; CHECK: br %r14 %a = load i64 *%src @@ -16,7 +16,7 @@ define i64 @f1(i64 *%src) { ; Check the high end of the aligned LRVG range. define i64 @f2(i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lrvg %r2, 524280(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -28,7 +28,7 @@ define i64 @f2(i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f3(i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: lrvg %r2, 0(%r2) ; CHECK: br %r14 @@ -40,7 +40,7 @@ define i64 @f3(i64 *%src) { ; Check the high end of the negative aligned LRVG range. define i64 @f4(i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lrvg %r2, -8(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -51,7 +51,7 @@ define i64 @f4(i64 *%src) { ; Check the low end of the LRVG range. define i64 @f5(i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lrvg %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -63,7 +63,7 @@ define i64 @f5(i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524296 ; CHECK: lrvg %r2, 0(%r2) ; CHECK: br %r14 @@ -75,7 +75,7 @@ define i64 @f6(i64 *%src) { ; Check that LRVG allows an index. define i64 @f7(i64 %src, i64 %index) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lrvg %r2, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -85,3 +85,92 @@ define i64 @f7(i64 %src, i64 %index) { %swapped = call i64 @llvm.bswap.i64(i64 %a) ret i64 %swapped } + +; Check that volatile accesses do not use LRVG, which might access the +; storage multple times. +define i64 @f8(i64 *%src) { +; CHECK-LABEL: f8: +; CHECK: lg [[REG:%r[0-5]]], 0(%r2) +; CHECK: lrvgr %r2, [[REG]] +; CHECK: br %r14 + %a = load volatile i64 *%src + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Test a case where we spill the source of at least one LRVGR. We want +; to use LRVG if possible. +define void @f9(i64 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: lrvg {{%r[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %swapped0 = call i64 @llvm.bswap.i64(i64 %val0) + %swapped1 = call i64 @llvm.bswap.i64(i64 %val1) + %swapped2 = call i64 @llvm.bswap.i64(i64 %val2) + %swapped3 = call i64 @llvm.bswap.i64(i64 %val3) + %swapped4 = call i64 @llvm.bswap.i64(i64 %val4) + %swapped5 = call i64 @llvm.bswap.i64(i64 %val5) + %swapped6 = call i64 @llvm.bswap.i64(i64 %val6) + %swapped7 = call i64 @llvm.bswap.i64(i64 %val7) + %swapped8 = call i64 @llvm.bswap.i64(i64 %val8) + %swapped9 = call i64 @llvm.bswap.i64(i64 %val9) + %swapped10 = call i64 @llvm.bswap.i64(i64 %val10) + %swapped11 = call i64 @llvm.bswap.i64(i64 %val11) + %swapped12 = call i64 @llvm.bswap.i64(i64 %val12) + %swapped13 = call i64 @llvm.bswap.i64(i64 %val13) + %swapped14 = call i64 @llvm.bswap.i64(i64 %val14) + %swapped15 = call i64 @llvm.bswap.i64(i64 %val15) + + store volatile i64 %val0, i64 *%ptr + store volatile i64 %val1, i64 *%ptr + store volatile i64 %val2, i64 *%ptr + store volatile i64 %val3, i64 *%ptr + store volatile i64 %val4, i64 *%ptr + store volatile i64 %val5, i64 *%ptr + store volatile i64 %val6, i64 *%ptr + store volatile i64 %val7, i64 *%ptr + store volatile i64 %val8, i64 *%ptr + store volatile i64 %val9, i64 *%ptr + store volatile i64 %val10, i64 *%ptr + store volatile i64 %val11, i64 *%ptr + store volatile i64 %val12, i64 *%ptr + store volatile i64 %val13, i64 *%ptr + store volatile i64 %val14, i64 *%ptr + store volatile i64 %val15, i64 *%ptr + + store volatile i64 %swapped0, i64 *%ptr + store volatile i64 %swapped1, i64 *%ptr + store volatile i64 %swapped2, i64 *%ptr + store volatile i64 %swapped3, i64 *%ptr + store volatile i64 %swapped4, i64 *%ptr + store volatile i64 %swapped5, i64 *%ptr + store volatile i64 %swapped6, i64 *%ptr + store volatile i64 %swapped7, i64 *%ptr + store volatile i64 %swapped8, i64 *%ptr + store volatile i64 %swapped9, i64 *%ptr + store volatile i64 %swapped10, i64 *%ptr + store volatile i64 %swapped11, i64 *%ptr + store volatile i64 %swapped12, i64 *%ptr + store volatile i64 %swapped13, i64 *%ptr + store volatile i64 %swapped14, i64 *%ptr + store volatile i64 %swapped15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/bswap-04.ll b/test/CodeGen/SystemZ/bswap-04.ll index 192327bd256c1..29d5a7b072127 100644 --- a/test/CodeGen/SystemZ/bswap-04.ll +++ b/test/CodeGen/SystemZ/bswap-04.ll @@ -5,21 +5,21 @@ declare i32 @llvm.bswap.i32(i32 %a) ; Check STRV with no displacement. -define void @f1(i32 *%src, i32 %a) { -; CHECK: f1: +define void @f1(i32 *%dst, i32 %a) { +; CHECK-LABEL: f1: ; CHECK: strv %r3, 0(%r2) ; CHECK: br %r14 %swapped = call i32 @llvm.bswap.i32(i32 %a) - store i32 %swapped, i32 *%src + store i32 %swapped, i32 *%dst ret void } ; Check the high end of the aligned STRV range. -define void @f2(i32 *%src, i32 %a) { -; CHECK: f2: +define void @f2(i32 *%dst, i32 %a) { +; CHECK-LABEL: f2: ; CHECK: strv %r3, 524284(%r2) ; CHECK: br %r14 - %ptr = getelementptr i32 *%src, i64 131071 + %ptr = getelementptr i32 *%dst, i64 131071 %swapped = call i32 @llvm.bswap.i32(i32 %a) store i32 %swapped, i32 *%ptr ret void @@ -27,34 +27,34 @@ define void @f2(i32 *%src, i32 %a) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. -define void @f3(i32 *%src, i32 %a) { -; CHECK: f3: +define void @f3(i32 *%dst, i32 %a) { +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: strv %r3, 0(%r2) ; CHECK: br %r14 - %ptr = getelementptr i32 *%src, i64 131072 + %ptr = getelementptr i32 *%dst, i64 131072 %swapped = call i32 @llvm.bswap.i32(i32 %a) store i32 %swapped, i32 *%ptr ret void } ; Check the high end of the negative aligned STRV range. -define void @f4(i32 *%src, i32 %a) { -; CHECK: f4: +define void @f4(i32 *%dst, i32 %a) { +; CHECK-LABEL: f4: ; CHECK: strv %r3, -4(%r2) ; CHECK: br %r14 - %ptr = getelementptr i32 *%src, i64 -1 + %ptr = getelementptr i32 *%dst, i64 -1 %swapped = call i32 @llvm.bswap.i32(i32 %a) store i32 %swapped, i32 *%ptr ret void } ; Check the low end of the STRV range. -define void @f5(i32 *%src, i32 %a) { -; CHECK: f5: +define void @f5(i32 *%dst, i32 %a) { +; CHECK-LABEL: f5: ; CHECK: strv %r3, -524288(%r2) ; CHECK: br %r14 - %ptr = getelementptr i32 *%src, i64 -131072 + %ptr = getelementptr i32 *%dst, i64 -131072 %swapped = call i32 @llvm.bswap.i32(i32 %a) store i32 %swapped, i32 *%ptr ret void @@ -62,12 +62,12 @@ define void @f5(i32 *%src, i32 %a) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. -define void @f6(i32 *%src, i32 %a) { -; CHECK: f6: +define void @f6(i32 *%dst, i32 %a) { +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524292 ; CHECK: strv %r3, 0(%r2) ; CHECK: br %r14 - %ptr = getelementptr i32 *%src, i64 -131073 + %ptr = getelementptr i32 *%dst, i64 -131073 %swapped = call i32 @llvm.bswap.i32(i32 %a) store i32 %swapped, i32 *%ptr ret void @@ -75,7 +75,7 @@ define void @f6(i32 *%src, i32 %a) { ; Check that STRV allows an index. define void @f7(i64 %src, i64 %index, i32 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: strv %r4, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i32 %a) { store i32 %swapped, i32 *%ptr ret void } + +; Check that volatile stores do not use STRV, which might access the +; storage multple times. +define void @f8(i32 *%dst, i32 %a) { +; CHECK-LABEL: f8: +; CHECK: lrvr [[REG:%r[0-5]]], %r3 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store volatile i32 %swapped, i32 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/bswap-05.ll b/test/CodeGen/SystemZ/bswap-05.ll index e58cb80c39815..5c8361e26cea9 100644 --- a/test/CodeGen/SystemZ/bswap-05.ll +++ b/test/CodeGen/SystemZ/bswap-05.ll @@ -5,21 +5,21 @@ declare i64 @llvm.bswap.i64(i64 %a) ; Check STRVG with no displacement. -define void @f1(i64 *%src, i64 %a) { -; CHECK: f1: +define void @f1(i64 *%dst, i64 %a) { +; CHECK-LABEL: f1: ; CHECK: strvg %r3, 0(%r2) ; CHECK: br %r14 %swapped = call i64 @llvm.bswap.i64(i64 %a) - store i64 %swapped, i64 *%src + store i64 %swapped, i64 *%dst ret void } ; Check the high end of the aligned STRVG range. -define void @f2(i64 *%src, i64 %a) { -; CHECK: f2: +define void @f2(i64 *%dst, i64 %a) { +; CHECK-LABEL: f2: ; CHECK: strvg %r3, 524280(%r2) ; CHECK: br %r14 - %ptr = getelementptr i64 *%src, i64 65535 + %ptr = getelementptr i64 *%dst, i64 65535 %swapped = call i64 @llvm.bswap.i64(i64 %a) store i64 %swapped, i64 *%ptr ret void @@ -27,34 +27,34 @@ define void @f2(i64 *%src, i64 %a) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. -define void @f3(i64 *%src, i64 %a) { -; CHECK: f3: +define void @f3(i64 *%dst, i64 %a) { +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: strvg %r3, 0(%r2) ; CHECK: br %r14 - %ptr = getelementptr i64 *%src, i64 65536 + %ptr = getelementptr i64 *%dst, i64 65536 %swapped = call i64 @llvm.bswap.i64(i64 %a) store i64 %swapped, i64 *%ptr ret void } ; Check the high end of the negative aligned STRVG range. -define void @f4(i64 *%src, i64 %a) { -; CHECK: f4: +define void @f4(i64 *%dst, i64 %a) { +; CHECK-LABEL: f4: ; CHECK: strvg %r3, -8(%r2) ; CHECK: br %r14 - %ptr = getelementptr i64 *%src, i64 -1 + %ptr = getelementptr i64 *%dst, i64 -1 %swapped = call i64 @llvm.bswap.i64(i64 %a) store i64 %swapped, i64 *%ptr ret void } ; Check the low end of the STRVG range. -define void @f5(i64 *%src, i64 %a) { -; CHECK: f5: +define void @f5(i64 *%dst, i64 %a) { +; CHECK-LABEL: f5: ; CHECK: strvg %r3, -524288(%r2) ; CHECK: br %r14 - %ptr = getelementptr i64 *%src, i64 -65536 + %ptr = getelementptr i64 *%dst, i64 -65536 %swapped = call i64 @llvm.bswap.i64(i64 %a) store i64 %swapped, i64 *%ptr ret void @@ -62,12 +62,12 @@ define void @f5(i64 *%src, i64 %a) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. -define void @f6(i64 *%src, i64 %a) { -; CHECK: f6: +define void @f6(i64 *%dst, i64 %a) { +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524296 ; CHECK: strvg %r3, 0(%r2) ; CHECK: br %r14 - %ptr = getelementptr i64 *%src, i64 -65537 + %ptr = getelementptr i64 *%dst, i64 -65537 %swapped = call i64 @llvm.bswap.i64(i64 %a) store i64 %swapped, i64 *%ptr ret void @@ -75,7 +75,7 @@ define void @f6(i64 *%src, i64 %a) { ; Check that STRVG allows an index. define void @f7(i64 %src, i64 %index, i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: strvg %r4, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i64 %a) { store i64 %swapped, i64 *%ptr ret void } + +; Check that volatile stores do not use STRVG, which might access the +; storage multple times. +define void @f8(i64 *%dst, i64 %a) { +; CHECK-LABEL: f8: +; CHECK: lrvgr [[REG:%r[0-5]]], %r3 +; CHECK: stg [[REG]], 0(%r2) +; CHECK: br %r14 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store volatile i64 %swapped, i64 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/call-01.ll b/test/CodeGen/SystemZ/call-01.ll index 1b9172bdd8199..42b6afdd98d4b 100644 --- a/test/CodeGen/SystemZ/call-01.ll +++ b/test/CodeGen/SystemZ/call-01.ll @@ -6,7 +6,7 @@ declare i64 @bar() ; We must allocate 160 bytes for the callee and save and restore %r14. define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: aghi %r15, -160 ; CHECK: brasl %r14, bar@PLT diff --git a/test/CodeGen/SystemZ/call-02.ll b/test/CodeGen/SystemZ/call-02.ll index 07dd67bab1b67..5f14d12249f53 100644 --- a/test/CodeGen/SystemZ/call-02.ll +++ b/test/CodeGen/SystemZ/call-02.ll @@ -4,7 +4,7 @@ ; We must allocate 160 bytes for the callee and save and restore %r14. define i64 @f1(i64() *%bar) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: aghi %r15, -160 ; CHECK: basr %r14, %r2 diff --git a/test/CodeGen/SystemZ/call-03.ll b/test/CodeGen/SystemZ/call-03.ll new file mode 100644 index 0000000000000..1f314eae58c80 --- /dev/null +++ b/test/CodeGen/SystemZ/call-03.ll @@ -0,0 +1,125 @@ +; Test sibling calls. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @ok(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + float %f4, double %f6) +declare void @uses_r6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, i64 %r6) +declare void @uses_indirect(fp128 %r2) +declare void @uses_stack(float %f0, float %f2, float %f4, float %f6, + float %stack) +declare i32 @returns_i32() +declare i64 @returns_i64() + +; Check the maximum number of arguments that we can pass and still use +; a sibling call. +define void @f1() { +; CHECK-LABEL: f1: +; CHECK-DAG: lzer %f0 +; CHECK-DAG: lzdr %f2 +; CHECK-DAG: lhi %r2, 1 +; CHECK-DAG: lhi %r3, 2 +; CHECK-DAG: lhi %r4, 3 +; CHECK-DAG: lghi %r5, 4 +; CHECK-DAG: {{ler %f4, %f0|lzer %f4}} +; CHECK-DAG: {{ldr %f6, %f2|lzdr %f6}} +; CHECK: jg ok@PLT + tail call void @ok(i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0, + float 0.0, double 0.0) + ret void +} + +; Check a call that uses %r6 to pass an argument. At the moment we don't +; use sibling calls in that case. +define void @f2() { +; CHECK-LABEL: f2: +; CHECK: brasl %r14, uses_r6@PLT +; CHECK: br %r14 + tail call void @uses_r6(i8 1, i16 2, i32 3, i64 4, i64 5) + ret void +} + +; Check a call that passes indirect arguments. We can't use sibling +; calls in that case. +define void @f3() { +; CHECK-LABEL: f3: +; CHECK: brasl %r14, uses_indirect@PLT +; CHECK: br %r14 + tail call void @uses_indirect(fp128 0xL00000000000000000000000000000000) + ret void +} + +; Check a call that uses direct stack arguments, which again prevents +; sibling calls +define void @f4() { +; CHECK-LABEL: f4: +; CHECK: brasl %r14, uses_stack@PLT +; CHECK: br %r14 + tail call void @uses_stack(float 0.0, float 0.0, float 0.0, float 0.0, + float 0.0) + ret void +} + +; Check an indirect call. In this case the only acceptable choice for +; the target register is %r1. +define void @f5(void(i32, i32, i32, i32) *%foo) { +; CHECK-LABEL: f5: +; CHECK: lgr %r1, %r2 +; CHECK-DAG: lhi %r2, 1 +; CHECK-DAG: lhi %r3, 2 +; CHECK-DAG: lhi %r4, 3 +; CHECK-DAG: lhi %r5, 4 +; CHECK: br %r1 + tail call void %foo(i32 1, i32 2, i32 3, i32 4) + ret void +} + +; Check an indirect call that will be forced into a call-saved GPR +; (which should be %r13, the highest GPR not used for anything else). +define void @f6(void(i32) *%foo) { +; CHECK-LABEL: f6: +; CHECK: stmg %r13, %r15, 104(%r15) +; CHECK: lgr %r13, %r2 +; CHECK: brasl %r14, returns_i32 +; CHECK: lgr %r1, %r13 +; CHECK: lmg %r13, %r15, 264(%r15) +; CHECK: br %r1 + %arg = call i32 @returns_i32() + tail call void %foo(i32 %arg) + ret void +} + +; Test a function that returns a value. +define i64 @f7() { +; CHECK-LABEL: f7: +; CHECK: jg returns_i64@PLT + %res = tail call i64 @returns_i64() + ret i64 %res +} + +; Test a function that returns a value truncated from i64 to i32. +define i32 @f8() { +; CHECK-LABEL: f8: +; CHECK: jg returns_i64@PLT + %res = tail call i64 @returns_i64() + %trunc = trunc i64 %res to i32 + ret i32 %trunc +} + +; Test a function that returns a value truncated from i64 to i7. +define i7 @f9() { +; CHECK-LABEL: f9: +; CHECK: jg returns_i64@PLT + %res = tail call i64 @returns_i64() + %trunc = trunc i64 %res to i7 + ret i7 %trunc +} + +; Test a function that returns a value truncated from i32 to i8. +define i8 @f10() { +; CHECK-LABEL: f10: +; CHECK: jg returns_i32@PLT + %res = tail call i32 @returns_i32() + %trunc = trunc i32 %res to i8 + ret i8 %trunc +} diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll index 477bcb00e9bd4..d5ea977869005 100644 --- a/test/CodeGen/SystemZ/cmpxchg-01.ll +++ b/test/CodeGen/SystemZ/cmpxchg-01.ll @@ -11,24 +11,23 @@ ; being used in the RISBG (in contrast to things like atomic addition, ; which shift %r3 left so that %b is at the high end of the word). define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) { -; CHECK-MAIN: f1: +; CHECK-MAIN-LABEL: f1: ; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3 ; CHECK-MAIN: nill %r3, 65532 ; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3) ; CHECK-MAIN: [[LOOP:\.[^ ]*]]: ; CHECK-MAIN: rll %r2, [[OLD]], 8([[SHIFT]]) ; CHECK-MAIN: risbg %r4, %r2, 32, 55, 0 -; CHECK-MAIN: cr %r2, %r4 -; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: crjlh %r2, %r4, [[EXIT:\.[^ ]*]] ; CHECK-MAIN: risbg %r5, %r2, 32, 55, 0 ; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -8({{%r[1-9]+}}) ; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3) -; CHECK-MAIN: j{{g?}}lh [[LOOP]] +; CHECK-MAIN: jl [[LOOP]] ; CHECK-MAIN: [[EXIT]]: ; CHECK-MAIN-NOT: %r2 ; CHECK-MAIN: br %r14 ; -; CHECK-SHIFT: f1: +; CHECK-SHIFT-LABEL: f1: ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT: rll @@ -40,13 +39,13 @@ define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) { ; Check compare and swap with constants. We should force the constants into ; registers and use the sequence above. define i8 @f2(i8 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi [[CMP:%r[0-9]+]], 42 ; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 55, 0 ; CHECK: risbg ; CHECK: br %r14 ; -; CHECK-SHIFT: f2: +; CHECK-SHIFT-LABEL: f2: ; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88 ; CHECK-SHIFT: risbg ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0 diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll index cc3452320b3d0..08c79d717c1e8 100644 --- a/test/CodeGen/SystemZ/cmpxchg-02.ll +++ b/test/CodeGen/SystemZ/cmpxchg-02.ll @@ -11,24 +11,23 @@ ; being used in the RISBG (in contrast to things like atomic addition, ; which shift %r3 left so that %b is at the high end of the word). define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) { -; CHECK-MAIN: f1: +; CHECK-MAIN-LABEL: f1: ; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3 ; CHECK-MAIN: nill %r3, 65532 ; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3) ; CHECK-MAIN: [[LOOP:\.[^ ]*]]: ; CHECK-MAIN: rll %r2, [[OLD]], 16([[SHIFT]]) ; CHECK-MAIN: risbg %r4, %r2, 32, 47, 0 -; CHECK-MAIN: cr %r2, %r4 -; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: crjlh %r2, %r4, [[EXIT:\.[^ ]*]] ; CHECK-MAIN: risbg %r5, %r2, 32, 47, 0 ; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -16({{%r[1-9]+}}) ; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3) -; CHECK-MAIN: j{{g?}}lh [[LOOP]] +; CHECK-MAIN: jl [[LOOP]] ; CHECK-MAIN: [[EXIT]]: ; CHECK-MAIN-NOT: %r2 ; CHECK-MAIN: br %r14 ; -; CHECK-SHIFT: f1: +; CHECK-SHIFT-LABEL: f1: ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] ; CHECK-SHIFT: rll @@ -40,13 +39,13 @@ define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) { ; Check compare and swap with constants. We should force the constants into ; registers and use the sequence above. define i16 @f2(i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi [[CMP:%r[0-9]+]], 42 ; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 47, 0 ; CHECK: risbg ; CHECK: br %r14 ; -; CHECK-SHIFT: f2: +; CHECK-SHIFT-LABEL: f2: ; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88 ; CHECK-SHIFT: risbg ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0 diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll index 45e224eda84ce..3917979ac24cf 100644 --- a/test/CodeGen/SystemZ/cmpxchg-03.ll +++ b/test/CodeGen/SystemZ/cmpxchg-03.ll @@ -4,7 +4,7 @@ ; Check the low end of the CS range. define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cs %r2, %r3, 0(%r4) ; CHECK: br %r14 %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst @@ -13,7 +13,7 @@ define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) { ; Check the high end of the aligned CS range. define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cs %r2, %r3, 4092(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -23,7 +23,7 @@ define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) { ; Check the next word up, which should use CSY instead of CS. define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: csy %r2, %r3, 4096(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -33,7 +33,7 @@ define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) { ; Check the high end of the aligned CSY range. define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: csy %r2, %r3, 524284(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -44,7 +44,7 @@ define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r4, 524288 ; CHECK: cs %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) { ; Check the high end of the negative aligned CSY range. define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: csy %r2, %r3, -4(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -65,7 +65,7 @@ define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) { ; Check the low end of the CSY range. define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: csy %r2, %r3, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +76,7 @@ define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r4, -524292 ; CHECK: cs %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) { ; Check that CS does not allow an index. define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agr %r4, %r5 ; CHECK: cs %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -99,7 +99,7 @@ define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) { ; Check that CSY does not allow an index. define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agr %r4, %r5 ; CHECK: csy %r2, %r3, 4096(%r4) ; CHECK: br %r14 @@ -112,7 +112,7 @@ define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) { ; Check that a constant %cmp value is loaded into a register first. define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: lhi %r2, 1001 ; CHECK: cs %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -122,7 +122,7 @@ define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) { ; Check that a constant %swap value is loaded into a register first. define i32 @f12(i32 %cmp, i32 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: lhi [[SWAP:%r[0-9]+]], 1002 ; CHECK: cs %r2, [[SWAP]], 0(%r3) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll index f8969ee084492..f58868f04f2dd 100644 --- a/test/CodeGen/SystemZ/cmpxchg-04.ll +++ b/test/CodeGen/SystemZ/cmpxchg-04.ll @@ -4,7 +4,7 @@ ; Check CSG without a displacement. define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: csg %r2, %r3, 0(%r4) ; CHECK: br %r14 %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst @@ -13,7 +13,7 @@ define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) { ; Check the high end of the aligned CSG range. define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: csg %r2, %r3, 524280(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -24,7 +24,7 @@ define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r4, 524288 ; CHECK: csg %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -35,7 +35,7 @@ define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) { ; Check the high end of the negative aligned CSG range. define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: csg %r2, %r3, -8(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -45,7 +45,7 @@ define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) { ; Check the low end of the CSG range. define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: csg %r2, %r3, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -56,7 +56,7 @@ define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r4, -524296 ; CHECK: csg %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -67,7 +67,7 @@ define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) { ; Check that CSG does not allow an index. define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agr %r4, %r5 ; CHECK: csg %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -79,7 +79,7 @@ define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) { ; Check that a constant %cmp value is loaded into a register first. define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lghi %r2, 1001 ; CHECK: csg %r2, %r3, 0(%r4) ; CHECK: br %r14 @@ -89,7 +89,7 @@ define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) { ; Check that a constant %swap value is loaded into a register first. define i64 @f9(i64 %cmp, i64 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lghi [[SWAP:%r[0-9]+]], 1002 ; CHECK: csg %r2, [[SWAP]], 0(%r3) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/cond-load-01.ll b/test/CodeGen/SystemZ/cond-load-01.ll new file mode 100644 index 0000000000000..1030226798d1c --- /dev/null +++ b/test/CodeGen/SystemZ/cond-load-01.ll @@ -0,0 +1,130 @@ +; Test LOC. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo(i32 *) + +; Test the simple case. +define i32 @f1(i32 %easy, i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: loche %r2, 0(%r3) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; ...and again with the operands swapped. +define i32 @f2(i32 %easy, i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK: clfi %r4, 42 +; CHECK: locl %r2, 0(%r3) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %other, i32 %easy + ret i32 %res +} + +; Check the high end of the aligned LOC range. +define i32 @f3(i32 %easy, i32 *%base, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: clfi %r4, 42 +; CHECK: loche %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Check the next word up. Other sequences besides this one would be OK. +define i32 @f4(i32 %easy, i32 *%base, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: clfi %r4, 42 +; CHECK: loche %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Check the low end of the LOC range. +define i32 @f5(i32 %easy, i32 *%base, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 42 +; CHECK: loche %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Check the next word down, with the same comments as f4. +define i32 @f6(i32 %easy, i32 *%base, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: clfi %r4, 42 +; CHECK: loche %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Try a frame index base. +define i32 @f7(i32 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: loche %r2, {{[0-9]+}}(%r15) +; CHECK: br %r14 + %ptr = alloca i32 + %easy = call i32 @foo(i32 *%ptr) + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Try a case when an index is involved. +define i32 @f8(i32 %easy, i32 %limit, i64 %base, i64 %index) { +; CHECK-LABEL: f8: +; CHECK: clfi %r3, 42 +; CHECK: loche %r2, 0({{%r[1-5]}}) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %cond = icmp ult i32 %limit, 42 + %other = load i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + ret i32 %res +} + +; Test that conditionally-executed loads do not use LOC, since it is allowed +; to trap even when the condition is false. +define i32 @f9(i32 %easy, i32 %limit, i32 *%ptr) { +; CHECK-LABEL: f9: +; CHECK-NOT: loc +; CHECK: br %r14 +entry: + %cmp = icmp ule i32 %easy, %limit + br i1 %cmp, label %load, label %exit + +load: + %other = load i32 *%ptr + br label %exit + +exit: + %res = phi i32 [ %easy, %entry ], [ %other, %load ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/cond-load-02.ll b/test/CodeGen/SystemZ/cond-load-02.ll new file mode 100644 index 0000000000000..e97f4728bc0b3 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-load-02.ll @@ -0,0 +1,130 @@ +; Test LOCG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo(i64 *) + +; Test the simple case. +define i64 @f1(i64 %easy, i64 *%ptr, i64 %limit) { +; CHECK-LABEL: f1: +; CHECK: clgfi %r4, 42 +; CHECK: locghe %r2, 0(%r3) +; CHECK: br %r14 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; ...and again with the operands swapped. +define i64 @f2(i64 %easy, i64 *%ptr, i64 %limit) { +; CHECK-LABEL: f2: +; CHECK: clgfi %r4, 42 +; CHECK: locgl %r2, 0(%r3) +; CHECK: br %r14 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %other, i64 %easy + ret i64 %res +} + +; Check the high end of the aligned LOCG range. +define i64 @f3(i64 %easy, i64 *%base, i64 %limit) { +; CHECK-LABEL: f3: +; CHECK: clgfi %r4, 42 +; CHECK: locghe %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Check the next doubleword up. Other sequences besides this one would be OK. +define i64 @f4(i64 %easy, i64 *%base, i64 %limit) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: clgfi %r4, 42 +; CHECK: locghe %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Check the low end of the LOCG range. +define i64 @f5(i64 %easy, i64 *%base, i64 %limit) { +; CHECK-LABEL: f5: +; CHECK: clgfi %r4, 42 +; CHECK: locghe %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Check the next doubleword down, with the same comments as f4. +define i64 @f6(i64 %easy, i64 *%base, i64 %limit) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: clgfi %r4, 42 +; CHECK: locghe %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Try a frame index base. +define i64 @f7(i64 %alt, i64 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: locghe %r2, {{[0-9]+}}(%r15) +; CHECK: br %r14 + %ptr = alloca i64 + %easy = call i64 @foo(i64 *%ptr) + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Try a case when an index is involved. +define i64 @f8(i64 %easy, i64 %limit, i64 %base, i64 %index) { +; CHECK-LABEL: f8: +; CHECK: clgfi %r3, 42 +; CHECK: locghe %r2, 0({{%r[1-5]}}) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %cond = icmp ult i64 %limit, 42 + %other = load i64 *%ptr + %res = select i1 %cond, i64 %easy, i64 %other + ret i64 %res +} + +; Test that conditionally-executed loads do not use LOCG, since it is allowed +; to trap even when the condition is false. +define i64 @f9(i64 %easy, i64 %limit, i64 *%ptr) { +; CHECK-LABEL: f9: +; CHECK-NOT: locg +; CHECK: br %r14 +entry: + %cmp = icmp ule i64 %easy, %limit + br i1 %cmp, label %load, label %exit + +load: + %other = load i64 *%ptr + br label %exit + +exit: + %res = phi i64 [ %easy, %entry ], [ %other, %load ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/cond-move-01.ll b/test/CodeGen/SystemZ/cond-move-01.ll new file mode 100644 index 0000000000000..088dee0232ea7 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-move-01.ll @@ -0,0 +1,48 @@ +; Test LOCR and LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test LOCR. +define i32 @f1(i32 %a, i32 %b, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: locrhe %r2, %r3 +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +; Test LOCGR. +define i64 @f2(i64 %a, i64 %b, i64 %limit) { +; CHECK-LABEL: f2: +; CHECK: clgfi %r4, 42 +; CHECK: locgrhe %r2, %r3 +; CHECK: br %r14 + %cond = icmp ult i64 %limit, 42 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Test LOCR in a case that could use COMPARE AND BRANCH. We prefer using +; LOCR if possible. +define i32 @f3(i32 %a, i32 %b, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: chi %r4, 42 +; CHECK: locrlh %r2, %r3 +; CHECK: br %r14 + %cond = icmp eq i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +; ...and again for LOCGR. +define i64 @f4(i64 %a, i64 %b, i64 %limit) { +; CHECK-LABEL: f4: +; CHECK: cghi %r4, 42 +; CHECK: locgrlh %r2, %r3 +; CHECK: br %r14 + %cond = icmp eq i64 %limit, 42 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll new file mode 100644 index 0000000000000..d55ea2133e8f4 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-01.ll @@ -0,0 +1,398 @@ +; Test 8-bit conditional stores that are presented as selects. The volatile +; tests require z10, which use a branch instead of a LOCR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo(i8 *) + +; Test the simple case, with the loaded value first. +define void @f1(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %alt, i8 %orig + store i8 %res, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 32 bits, with the +; loaded value first. +define void @f3(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f7(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f8(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 64 bits, with the +; loaded value first. +define void @f9(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f10(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Check the high end of the STC range. +define void @f11(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 4095(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 4095 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte up, which should use STCY instead of STC. +define void @f12(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 4096 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the high end of the STCY range. +define void @f13(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, 524287(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 524287 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f14(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f14: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 524288 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the low end of the STCY range. +define void @f15(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f15: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 -524288 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f16(i8 *%base, i8 %alt, i32 %limit) { +; CHECK-LABEL: f16: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524289 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 -524289 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check that STCY allows an index. +define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) { +; CHECK-LABEL: f17: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f18(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK-LABEL: f18: +; CHECK: lb {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stc {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f19(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK-LABEL: f19: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lb %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store volatile i8 %res, i8 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f20(i8 *%ptr, i8 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK-LABEL: f20: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: jl +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stc {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load atomic i8 *%ptr unordered, align 1 + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...likewise stores. +define void @f21(i8 *%ptr, i8 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK-LABEL: f21: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lb %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store atomic i8 %res, i8 *%ptr unordered, align 1 + ret void +} + +; Try a frame index base. +define void @f22(i8 %alt, i32 %limit) { +; CHECK-LABEL: f22: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: stc {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i8 + call void @foo(i8 *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + call void @foo(i8 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll new file mode 100644 index 0000000000000..91bc4860b3848 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-02.ll @@ -0,0 +1,398 @@ +; Test 16-bit conditional stores that are presented as selects. The volatile +; tests require z10, which use a branch instead of a LOCR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo(i16 *) + +; Test the simple case, with the loaded value first. +define void @f1(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %alt, i16 %orig + store i16 %res, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 32 bits, with the +; loaded value first. +define void @f3(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f7(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f8(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 64 bits, with the +; loaded value first. +define void @f9(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f10(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Check the high end of the aligned STH range. +define void @f11(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 4094(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2047 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword up, which should use STHY instead of STH. +define void @f12(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2048 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the high end of the aligned STHY range. +define void @f13(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, 524286(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 262143 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f14(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f14: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 262144 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the low end of the STHY range. +define void @f15(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f15: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -262144 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f16(i16 *%base, i16 %alt, i32 %limit) { +; CHECK-LABEL: f16: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524290 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -262145 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check that STHY allows an index. +define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) { +; CHECK-LABEL: f17: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f18(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK-LABEL: f18: +; CHECK: lh {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: sth {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f19(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK-LABEL: f19: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lh %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store volatile i16 %res, i16 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f20(i16 *%ptr, i16 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK-LABEL: f20: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: jl +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: sth {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load atomic i16 *%ptr unordered, align 2 + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...likewise stores. +define void @f21(i16 *%ptr, i16 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK-LABEL: f21: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lh %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store atomic i16 %res, i16 *%ptr unordered, align 2 + ret void +} + +; Try a frame index base. +define void @f22(i16 %alt, i32 %limit) { +; CHECK-LABEL: f22: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i16 + call void @foo(i16 *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + call void @foo(i16 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll new file mode 100644 index 0000000000000..d4fd48d613244 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-03.ll @@ -0,0 +1,322 @@ +; Test 32-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case, with the loaded value first. +define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %alt, i32 %orig + store i32 %res, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Check the high end of the aligned ST range. +define void @f7(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 4092(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up, which should use STY instead of ST. +define void @f8(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the high end of the aligned STY range. +define void @f9(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, 524284(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f10(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the low end of the STY range. +define void @f11(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f12(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524292 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check that STY allows an index. +define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) { +; CHECK-LABEL: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f14(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f14: +; CHECK: l {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f15: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: l %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: st %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store volatile i32 %res, i32 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK-LABEL: f16: +; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load atomic i32 *%ptr unordered, align 4 + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...likewise stores. +define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK-LABEL: f17: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: l %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store atomic i32 %res, i32 *%ptr unordered, align 4 + ret void +} + +; Try a frame index base. +define void @f18(i32 %alt, i32 %limit) { +; CHECK-LABEL: f18: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + call void @foo(i32 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll new file mode 100644 index 0000000000000..fc565c432fff3 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-04.ll @@ -0,0 +1,214 @@ +; Test 64-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo(i64 *) + +; Test with the loaded value first. +define void @f1(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %alt, i64 %orig + store i64 %res, i64 *%ptr + ret void +} + +; Check the high end of the aligned STG range. +define void @f3(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 524280(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the low end of the STG range. +define void @f5(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524296 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check that STG allows an index. +define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r4, 524287(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f8(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK: lg {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f9(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: stg %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store volatile i64 %res, i64 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f10(i64 *%ptr, i64 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CSG. +; CHECK-LABEL: f10: +; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load atomic i64 *%ptr unordered, align 8 + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...likewise stores. +define void @f11(i64 *%ptr, i64 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CSG. +; CHECK-LABEL: f11: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: lg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store atomic i64 %res, i64 *%ptr unordered, align 8 + ret void +} + +; Try a frame index base. +define void @f12(i64 %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i64 + call void @foo(i64 *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + call void @foo(i64 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-05.ll b/test/CodeGen/SystemZ/cond-store-05.ll new file mode 100644 index 0000000000000..f8056f73c9285 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-05.ll @@ -0,0 +1,213 @@ +; Test f32 conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(float *) + +; Test with the loaded value first. +define void @f1(float *%ptr, float %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(float *%ptr, float %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %alt, float %orig + store float %res, float *%ptr + ret void +} + +; Check the high end of the aligned STE range. +define void @f3(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 4092(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word up, which should use STEY instead of STE. +define void @f4(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the high end of the aligned STEY range. +define void @f5(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 524284(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 131071 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 131072 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the low end of the STEY range. +define void @f7(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -131072 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(float *%base, float %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524292 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -131073 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check that STEY allows an index. +define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f10(float *%ptr, float %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK: le {{%f[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: ste {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %f0. +define void @f11(float *%ptr, float %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: le %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: ste %f0, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store volatile float %res, float *%ptr + ret void +} + +; Try a frame index base. +define void @f12(float %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: ste {{%f[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca float + call void @foo(float *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + call void @foo(float *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-06.ll b/test/CodeGen/SystemZ/cond-store-06.ll new file mode 100644 index 0000000000000..66681958d4746 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-06.ll @@ -0,0 +1,213 @@ +; Test f64 conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(double *) + +; Test with the loaded value first. +define void @f1(double *%ptr, double %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(double *%ptr, double %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %alt, double %orig + store double %res, double *%ptr + ret void +} + +; Check the high end of the aligned STD range. +define void @f3(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 4088(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword up, which should use STDY instead of STD. +define void @f4(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the high end of the aligned STDY range. +define void @f5(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 524280(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 65535 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 65536 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the low end of the STDY range. +define void @f7(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -65536 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(double *%base, double %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524296 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -65537 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check that STDY allows an index. +define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 524287(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to double * + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f10(double *%ptr, double %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK: ld {{%f[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: std {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load volatile double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %f0. +define void @f11(double *%ptr, double %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK: jhe [[LABEL:[^ ]*]] +; CHECK: ld %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store volatile double %res, double *%ptr + ret void +} + +; Try a frame index base. +define void @f12(double %alt, i32 %limit) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: std {{%f[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca double + call void @foo(double *%ptr) + %cond = icmp ult i32 %limit, 420 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + call void @foo(double *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-07.ll b/test/CodeGen/SystemZ/cond-store-07.ll new file mode 100644 index 0000000000000..b1df525566a37 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-07.ll @@ -0,0 +1,186 @@ +; Test STOCs that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case, with the loaded value first. +define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %alt, i32 %orig + store i32 %res, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Check the high end of the aligned STOC range. +define void @f7(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up. Other sequences besides this one would be OK. +define void @f8(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK: agfi %r2, 524288 +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the low end of the STOC range. +define void @f9(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word down, with the same comments as f8. +define void @f10(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK: agfi %r2, -524292 +; CHECK: clfi %r4, 42 +; CHECK: stoche %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Try a frame index base. +define void @f11(i32 %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: stoche {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + call void @foo(i32 *%ptr) + ret void +} + +; Test that conditionally-executed stores do not use STOC, since STOC +; is allowed to trap even when the condition is false. +define void @f12(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f12: +; CHECK-NOT: stoc +; CHECK: br %r14 +entry: + %cmp = icmp ule i32 %a, %b + br i1 %cmp, label %store, label %exit + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-08.ll b/test/CodeGen/SystemZ/cond-store-08.ll new file mode 100644 index 0000000000000..56dc7ee7777c7 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-08.ll @@ -0,0 +1,124 @@ +; Test STOCGs that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare void @foo(i64 *) + +; Test with the loaded value first. +define void @f1(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: stocghe %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK: clfi %r4, 42 +; CHECK: stocgl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %alt, i64 %orig + store i64 %res, i64 *%ptr + ret void +} + +; Check the high end of the aligned STOCG range. +define void @f3(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: clfi %r4, 42 +; CHECK: stocghe %r3, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword up. Other sequences besides this one would be OK. +define void @f4(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK: agfi %r2, 524288 +; CHECK: clfi %r4, 42 +; CHECK: stocghe %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the low end of the STOCG range. +define void @f5(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 42 +; CHECK: stocghe %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword down, with the same comments as f4. +define void @f6(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK: agfi %r2, -524296 +; CHECK: clfi %r4, 42 +; CHECK: stocghe %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Try a frame index base. +define void @f7(i64 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: stocghe {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i64 + call void @foo(i64 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + call void @foo(i64 *%ptr) + ret void +} + +; Test that conditionally-executed stores do not use STOC, since STOC +; is allowed to trap even when the condition is false. +define void @f8(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f8: +; CHECK-NOT: stocg %r3, 0(%r4) +; CHECK: br %r14 +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %store, label %exit + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll index 81b3fb273d140..0b4067da3d14b 100644 --- a/test/CodeGen/SystemZ/fp-abs-01.ll +++ b/test/CodeGen/SystemZ/fp-abs-01.ll @@ -5,7 +5,7 @@ ; Test f32. declare float @llvm.fabs.f32(float %f) define float @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lpebr %f0, %f0 ; CHECK: br %r14 %res = call float @llvm.fabs.f32(float %f) @@ -15,7 +15,7 @@ define float @f1(float %f) { ; Test f64. declare double @llvm.fabs.f64(double %f) define double @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lpdbr %f0, %f0 ; CHECK: br %r14 %res = call double @llvm.fabs.f64(double %f) @@ -27,7 +27,7 @@ define double @f2(double %f) { ; processing so that using FPRs is unequivocally better. declare fp128 @llvm.fabs.f128(fp128 %f) define void @f3(fp128 *%ptr, fp128 *%ptr2) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lpxbr ; CHECK: dxbr ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll index 513d49c7acf57..909c48a06377e 100644 --- a/test/CodeGen/SystemZ/fp-abs-02.ll +++ b/test/CodeGen/SystemZ/fp-abs-02.ll @@ -5,7 +5,7 @@ ; Test f32. declare float @llvm.fabs.f32(float %f) define float @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lnebr %f0, %f0 ; CHECK: br %r14 %abs = call float @llvm.fabs.f32(float %f) @@ -16,7 +16,7 @@ define float @f1(float %f) { ; Test f64. declare double @llvm.fabs.f64(double %f) define double @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lndbr %f0, %f0 ; CHECK: br %r14 %abs = call double @llvm.fabs.f64(double %f) @@ -29,7 +29,7 @@ define double @f2(double %f) { ; extra processing so that using FPRs is unequivocally better. declare fp128 @llvm.fabs.f128(fp128 %f) define void @f3(fp128 *%ptr, fp128 *%ptr2) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lnxbr ; CHECK: dxbr ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll index 7ce0777b88700..28a212801a63e 100644 --- a/test/CodeGen/SystemZ/fp-add-01.ll +++ b/test/CodeGen/SystemZ/fp-add-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare float @foo() + ; Check register addition. define float @f1(float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aebr %f0, %f2 ; CHECK: br %r14 %res = fadd float %f1, %f2 @@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) { ; Check the low end of the AEB range. define float @f2(float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aeb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load float *%ptr @@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) { ; Check the high end of the aligned AEB range. define float @f3(float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aeb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f4(float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: aeb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define float @f5(float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: aeb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) { ; Check that AEB allows indices. define float @f6(float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: aeb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) { %res = fadd float %f1, %f2 ret float %res } + +; Check that additions of spilled values can use AEB rather than AEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: aeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %ret = call float @foo() + + %add0 = fadd float %ret, %val0 + %add1 = fadd float %add0, %val1 + %add2 = fadd float %add1, %val2 + %add3 = fadd float %add2, %val3 + %add4 = fadd float %add3, %val4 + %add5 = fadd float %add4, %val5 + %add6 = fadd float %add5, %val6 + %add7 = fadd float %add6, %val7 + %add8 = fadd float %add7, %val8 + %add9 = fadd float %add8, %val9 + %add10 = fadd float %add9, %val10 + + ret float %add10 +} diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll index 08eb90efbfaa1..067c7474fb43a 100644 --- a/test/CodeGen/SystemZ/fp-add-02.ll +++ b/test/CodeGen/SystemZ/fp-add-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @foo() + ; Check register addition. define double @f1(double %f1, double %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: adbr %f0, %f2 ; CHECK: br %r14 %res = fadd double %f1, %f2 @@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) { ; Check the low end of the ADB range. define double @f2(double %f1, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: adb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load double *%ptr @@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) { ; Check the high end of the aligned ADB range. define double @f3(double %f1, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: adb %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %f1, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: adb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(double %f1, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: adb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) { ; Check that ADB allows indices. define double @f6(double %f1, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: adb %f0, 800(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) { %res = fadd double %f1, %f2 ret double %res } + +; Check that additions of spilled values can use ADB rather than ADBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: adb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %ret = call double @foo() + + %add0 = fadd double %ret, %val0 + %add1 = fadd double %add0, %val1 + %add2 = fadd double %add1, %val2 + %add3 = fadd double %add2, %val3 + %add4 = fadd double %add3, %val4 + %add5 = fadd double %add4, %val5 + %add6 = fadd double %add5, %val6 + %add7 = fadd double %add6, %val7 + %add8 = fadd double %add7, %val8 + %add9 = fadd double %add8, %val9 + %add10 = fadd double %add9, %val10 + + ret double %add10 +} diff --git a/test/CodeGen/SystemZ/fp-add-03.ll b/test/CodeGen/SystemZ/fp-add-03.ll index 13ffb023b6fba..cb4042eee472b 100644 --- a/test/CodeGen/SystemZ/fp-add-03.ll +++ b/test/CodeGen/SystemZ/fp-add-03.ll @@ -4,7 +4,7 @@ ; There is no memory form of 128-bit addition. define void @f1(fp128 *%ptr, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: ld %f1, 0(%r2) ; CHECK: ld %f3, 8(%r2) diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll index b80a71595e87d..d7c0cce9c2a5a 100644 --- a/test/CodeGen/SystemZ/fp-cmp-01.ll +++ b/test/CodeGen/SystemZ/fp-cmp-01.ll @@ -1,12 +1,15 @@ -; Test 32-bit floating-point comparison. +; Test 32-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare float @foo() ; Check comparison with registers. define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cebr %f0, %f2 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq float %f1, %f2 @@ -16,9 +19,9 @@ define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { ; Check the low end of the CEB range. define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %f2 = load float *%ptr @@ -29,9 +32,9 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { ; Check the high end of the aligned CEB range. define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ceb %f0, 4092(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -44,10 +47,10 @@ define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1024 @@ -59,10 +62,10 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r4, -4 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 -1 @@ -74,10 +77,10 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) { ; Check that CEB allows indices. define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 2 ; CHECK: ceb %f0, 400(%r1,%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr float *%base, i64 %index @@ -87,3 +90,230 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { %res = select i1 %cond, i64 %a, i64 %b ret i64 %res } + +; Check that comparisons of spilled values can use CEB rather than CEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %ret = call float @foo() + + %cmp0 = fcmp olt float %ret, %val0 + %cmp1 = fcmp olt float %ret, %val1 + %cmp2 = fcmp olt float %ret, %val2 + %cmp3 = fcmp olt float %ret, %val3 + %cmp4 = fcmp olt float %ret, %val4 + %cmp5 = fcmp olt float %ret, %val5 + %cmp6 = fcmp olt float %ret, %val6 + %cmp7 = fcmp olt float %ret, %val7 + %cmp8 = fcmp olt float %ret, %val8 + %cmp9 = fcmp olt float %ret, %val9 + %cmp10 = fcmp olt float %ret, %val10 + + %sel0 = select i1 %cmp0, float %ret, float 0.0 + %sel1 = select i1 %cmp1, float %sel0, float 1.0 + %sel2 = select i1 %cmp2, float %sel1, float 2.0 + %sel3 = select i1 %cmp3, float %sel2, float 3.0 + %sel4 = select i1 %cmp4, float %sel3, float 4.0 + %sel5 = select i1 %cmp5, float %sel4, float 5.0 + %sel6 = select i1 %cmp6, float %sel5, float 6.0 + %sel7 = select i1 %cmp7, float %sel6, float 7.0 + %sel8 = select i1 %cmp8, float %sel7, float 8.0 + %sel9 = select i1 %cmp9, float %sel8, float 9.0 + %sel10 = select i1 %cmp10, float %sel9, float 10.0 + + ret float %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, float %f) { +; CHECK-LABEL: f8: +; CHECK: ltebr %f0, %f0 +; CHECK-NEXT: je +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %cond = fcmp oeq float %f, 0.0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CEB to be used, +; first with oeq. +define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f9: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: je {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then one. +define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f10: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jlh {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp one float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then olt. +define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f11: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp olt float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ole. +define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f12: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jhe {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ole float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then oge. +define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f13: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jle {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp oge float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ogt. +define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f14: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jl {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ogt float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ueq. +define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f15: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jnlh {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ueq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then une. +define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f16: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jne {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp une float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ult. +define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f17: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jnle {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ult float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ule. +define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f18: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jnl {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ule float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then uge. +define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f19: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jnh {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp uge float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ugt. +define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) { +; CHECK-LABEL: f20: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: jnhe {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load float *%ptr + %cond = fcmp ugt float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll index 8227308ce8106..c61f04ed244e3 100644 --- a/test/CodeGen/SystemZ/fp-cmp-02.ll +++ b/test/CodeGen/SystemZ/fp-cmp-02.ll @@ -1,12 +1,15 @@ -; Test 64-bit floating-point comparison. +; Test 64-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare double @foo() ; Check comparison with registers. define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cdbr %f0, %f2 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq double %f1, %f2 @@ -16,9 +19,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { ; Check the low end of the CDB range. define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %f2 = load double *%ptr @@ -29,9 +32,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { ; Check the high end of the aligned CDB range. define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cdb %f0, 4088(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -44,10 +47,10 @@ define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 512 @@ -59,10 +62,10 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) { ; Check negative displacements, which also need separate address logic. define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r4, -8 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 -1 @@ -74,10 +77,10 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) { ; Check that CDB allows indices. define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 3 ; CHECK: cdb %f0, 800(%r1,%r4) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr double *%base, i64 %index @@ -87,3 +90,86 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { %res = select i1 %cond, i64 %a, i64 %b ret i64 %res } + +; Check that comparisons of spilled values can use CDB rather than CDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: cdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %ret = call double @foo() + + %cmp0 = fcmp olt double %ret, %val0 + %cmp1 = fcmp olt double %ret, %val1 + %cmp2 = fcmp olt double %ret, %val2 + %cmp3 = fcmp olt double %ret, %val3 + %cmp4 = fcmp olt double %ret, %val4 + %cmp5 = fcmp olt double %ret, %val5 + %cmp6 = fcmp olt double %ret, %val6 + %cmp7 = fcmp olt double %ret, %val7 + %cmp8 = fcmp olt double %ret, %val8 + %cmp9 = fcmp olt double %ret, %val9 + %cmp10 = fcmp olt double %ret, %val10 + + %sel0 = select i1 %cmp0, double %ret, double 0.0 + %sel1 = select i1 %cmp1, double %sel0, double 1.0 + %sel2 = select i1 %cmp2, double %sel1, double 2.0 + %sel3 = select i1 %cmp3, double %sel2, double 3.0 + %sel4 = select i1 %cmp4, double %sel3, double 4.0 + %sel5 = select i1 %cmp5, double %sel4, double 5.0 + %sel6 = select i1 %cmp6, double %sel5, double 6.0 + %sel7 = select i1 %cmp7, double %sel6, double 7.0 + %sel8 = select i1 %cmp8, double %sel7, double 8.0 + %sel9 = select i1 %cmp9, double %sel8, double 9.0 + %sel10 = select i1 %cmp10, double %sel9, double 10.0 + + ret double %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, double %f) { +; CHECK-LABEL: f8: +; CHECK: ltdbr %f0, %f0 +; CHECK-NEXT: je +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %cond = fcmp oeq double %f, 0.0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CDB to be used, +define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { +; CHECK-LABEL: f9: +; CHECK: cdb %f0, 0(%r4) +; CHECK-NEXT: jl {{\.L.*}} +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f1 = load double *%ptr + %cond = fcmp ogt double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll index fd12c93e27a51..e777d00c96873 100644 --- a/test/CodeGen/SystemZ/fp-cmp-03.ll +++ b/test/CodeGen/SystemZ/fp-cmp-03.ll @@ -1,15 +1,16 @@ -; Test 128-bit floating-point comparison. +; Test 128-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; There is no memory form of 128-bit comparison. define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: ld %f1, 0(%r4) ; CHECK: ld %f3, 8(%r4) ; CHECK: cxbr %f1, %f0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 %f2x = fpext float %f2 to fp128 @@ -18,3 +19,18 @@ define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) { %res = select i1 %cond, i64 %a, i64 %b ret i64 %res } + +; Check comparison with zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: ld %f0, 0(%r4) +; CHECK: ld %f2, 8(%r4) +; CHECK: ltxbr %f0, %f0 +; CHECK-NEXT: je +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f = load fp128 *%ptr + %cond = fcmp oeq fp128 %f, 0xL00000000000000000000000000000000 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-cmp-04.ll b/test/CodeGen/SystemZ/fp-cmp-04.ll new file mode 100644 index 0000000000000..8d842164fa4fa --- /dev/null +++ b/test/CodeGen/SystemZ/fp-cmp-04.ll @@ -0,0 +1,348 @@ +; Test that floating-point compares are ommitted if CC already has the +; right value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare float @llvm.fabs.f32(float %f) + +; Test addition followed by EQ, which can use the CC result of the addition. +define float @f1(float %a, float %b, float *%dest) { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = fadd float %a, %b + %cmp = fcmp oeq float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with LT. +define float @f2(float %a, float %b, float *%dest) { +; CHECK-LABEL: f2: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = fadd float %a, %b + %cmp = fcmp olt float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with GT. +define float @f3(float %a, float %b, float *%dest) { +; CHECK-LABEL: f3: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %res = fadd float %a, %b + %cmp = fcmp ogt float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with UEQ. +define float @f4(float %a, float %b, float *%dest) { +; CHECK-LABEL: f4: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: jnlh .L{{.*}} +; CHECK: br %r14 +entry: + %res = fadd float %a, %b + %cmp = fcmp ueq float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Subtraction also provides a zero-based CC value. +define float @f5(float %a, float %b, float *%dest) { +; CHECK-LABEL: f5: +; CHECK: seb %f0, 0(%r2) +; CHECK-NEXT: jnhe .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load float *%dest + %res = fsub float %a, %cur + %cmp = fcmp ult float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD POSITIVE. +define float @f6(float %dummy, float %a, float *%dest) { +; CHECK-LABEL: f6: +; CHECK: lpebr %f0, %f2 +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %res = call float @llvm.fabs.f32(float %a) + %cmp = fcmp ogt float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD NEGATIVE. +define float @f7(float %dummy, float %a, float *%dest) { +; CHECK-LABEL: f7: +; CHECK: lnebr %f0, %f2 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %abs = call float @llvm.fabs.f32(float %a) + %res = fsub float -0.0, %abs + %cmp = fcmp olt float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD COMPLEMENT. +define float @f8(float %dummy, float %a, float *%dest) { +; CHECK-LABEL: f8: +; CHECK: lcebr %f0, %f2 +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %res = fsub float -0.0, %a + %cmp = fcmp ole float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Multiplication (for example) does not modify CC. +define float @f9(float %a, float %b, float *%dest) { +; CHECK-LABEL: f9: +; CHECK: meebr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: jlh .L{{.*}} +; CHECK: br %r14 +entry: + %res = fmul float %a, %b + %cmp = fcmp one float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a combination involving a CC-setting instruction followed by +; a non-CC-setting instruction. +define float @f10(float %a, float %b, float %c, float *%dest) { +; CHECK-LABEL: f10: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: debr %f0, %f4 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %add = fadd float %a, %b + %res = fdiv float %add, %c + %cmp = fcmp une float %res, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a case where CC is set based on a different register from the +; compare input. +define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) { +; CHECK-LABEL: f11: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: sebr %f4, %f0 +; CHECK-NEXT: ste %f4, 0(%r2) +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %add = fadd float %a, %b + %sub = fsub float %c, %add + store float %sub, float *%dest1 + %cmp = fcmp oeq float %add, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %sub, float *%dest2 + br label %exit + +exit: + ret float %add +} + +; Test that LER gets converted to LTEBR where useful. +define float @f12(float %dummy, float %val, float *%dest) { +; CHECK-LABEL: f12: +; CHECK: ltebr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f0}"(float %val) + %cmp = fcmp olt float %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %val, float *%dest + br label %exit + +exit: + ret float %val +} + +; Test that LDR gets converted to LTDBR where useful. +define double @f13(double %dummy, double %val, double *%dest) { +; CHECK-LABEL: f13: +; CHECK: ltdbr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f0}"(double %val) + %cmp = fcmp olt double %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store double %val, double *%dest + br label %exit + +exit: + ret double %val +} + +; Test that LXR gets converted to LTXBR where useful. +define void @f14(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f14: +; CHECK: ltxbr +; CHECK-NEXT: dxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: mxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %val1 = load fp128 *%ptr1 + %val2 = load fp128 *%ptr2 + %div = fdiv fp128 %val1, %val2 + store fp128 %div, fp128 *%ptr1 + %mul = fmul fp128 %val1, %val2 + store fp128 %mul, fp128 *%ptr2 + %cmp = fcmp olt fp128 %val1, 0xL00000000000000000000000000000000 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret void +} + +; Test a case where it is the source rather than destination of LER that +; we need. +define float @f15(float %val, float %dummy, float *%dest) { +; CHECK-LABEL: f15: +; CHECK: ltebr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f2}"(float %val) + %cmp = fcmp olt float %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %val, float *%dest + br label %exit + +exit: + ret float %val +} + +; Test a case where it is the source rather than destination of LDR that +; we need. +define double @f16(double %val, double %dummy, double *%dest) { +; CHECK-LABEL: f16: +; CHECK: ltdbr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f2}"(double %val) + %cmp = fcmp olt double %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store double %val, double *%dest + br label %exit + +exit: + ret double %val +} diff --git a/test/CodeGen/SystemZ/fp-const-01.ll b/test/CodeGen/SystemZ/fp-const-01.ll index 65209d661e977..3a4ddf087803e 100644 --- a/test/CodeGen/SystemZ/fp-const-01.ll +++ b/test/CodeGen/SystemZ/fp-const-01.ll @@ -4,7 +4,7 @@ ; Test f32. define float @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lzer %f0 ; CHECK: br %r14 ret float 0.0 @@ -12,7 +12,7 @@ define float @f1() { ; Test f64. define double @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lzdr %f0 ; CHECK: br %r14 ret double 0.0 @@ -20,7 +20,7 @@ define double @f2() { ; Test f128. define void @f3(fp128 *%x) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lzxr %f0 ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll index 2dedf54e6f7df..96f857895ecfa 100644 --- a/test/CodeGen/SystemZ/fp-const-02.ll +++ b/test/CodeGen/SystemZ/fp-const-02.ll @@ -4,7 +4,7 @@ ; Test f32. define float @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lzer [[REGISTER:%f[0-5]+]] ; CHECK: lcebr %f0, [[REGISTER]] ; CHECK: br %r14 @@ -13,7 +13,7 @@ define float @f1() { ; Test f64. define double @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lzdr [[REGISTER:%f[0-5]+]] ; CHECK: lcdbr %f0, [[REGISTER]] ; CHECK: br %r14 @@ -22,7 +22,7 @@ define double @f2() { ; Test f128. define void @f3(fp128 *%x) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lzxr [[REGISTER:%f[0-5]+]] ; CHECK: lcxbr %f0, [[REGISTER]] ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-const-03.ll b/test/CodeGen/SystemZ/fp-const-03.ll index 4c287e4c08a37..b2ae94db0b7e6 100644 --- a/test/CodeGen/SystemZ/fp-const-03.ll +++ b/test/CodeGen/SystemZ/fp-const-03.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define float @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}} ; CHECK: le %f0, 0([[REGISTER]]) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-const-04.ll b/test/CodeGen/SystemZ/fp-const-04.ll index 847c380e3b982..d5526884dc698 100644 --- a/test/CodeGen/SystemZ/fp-const-04.ll +++ b/test/CodeGen/SystemZ/fp-const-04.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define double @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}} ; CHECK: ldeb %f0, 0([[REGISTER]]) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-const-05.ll b/test/CodeGen/SystemZ/fp-const-05.ll index 48f84ce5bee8a..d81e3db91f487 100644 --- a/test/CodeGen/SystemZ/fp-const-05.ll +++ b/test/CodeGen/SystemZ/fp-const-05.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define void @f1(fp128 *%x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} ; CHECK: lxeb %f0, 0([[REGISTER]]) ; CHECK: std %f0, 0(%r2) diff --git a/test/CodeGen/SystemZ/fp-const-06.ll b/test/CodeGen/SystemZ/fp-const-06.ll index 1da3d5eafaae3..088810ba8e408 100644 --- a/test/CodeGen/SystemZ/fp-const-06.ll +++ b/test/CodeGen/SystemZ/fp-const-06.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define double @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} ; CHECK: ld %f0, 0([[REGISTER]]) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-const-07.ll b/test/CodeGen/SystemZ/fp-const-07.ll index 5a108452a8e09..87e8f68b372f9 100644 --- a/test/CodeGen/SystemZ/fp-const-07.ll +++ b/test/CodeGen/SystemZ/fp-const-07.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define void @f1(fp128 *%x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} ; CHECK: lxdb %f0, 0([[REGISTER]]) ; CHECK: std %f0, 0(%r2) diff --git a/test/CodeGen/SystemZ/fp-const-08.ll b/test/CodeGen/SystemZ/fp-const-08.ll index 6a8a1ab3f9b78..8845adbebc562 100644 --- a/test/CodeGen/SystemZ/fp-const-08.ll +++ b/test/CodeGen/SystemZ/fp-const-08.ll @@ -6,7 +6,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define void @f1(fp128 *%x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} ; CHECK: ld %f0, 0([[REGISTER]]) ; CHECK: ld %f2, 8([[REGISTER]]) diff --git a/test/CodeGen/SystemZ/fp-const-09.ll b/test/CodeGen/SystemZ/fp-const-09.ll index 435dcbacc19dc..0c7d726e9d09b 100644 --- a/test/CodeGen/SystemZ/fp-const-09.ll +++ b/test/CodeGen/SystemZ/fp-const-09.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST define void @f1(fp128 *%x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} ; CHECK: ld %f0, 0([[REGISTER]]) ; CHECK: ld %f2, 8([[REGISTER]]) diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll index 6c8ef4899776b..49ed43bce51c2 100644 --- a/test/CodeGen/SystemZ/fp-conv-01.ll +++ b/test/CodeGen/SystemZ/fp-conv-01.ll @@ -4,7 +4,7 @@ ; Test f64->f32. define float @f1(double %d1, double %d2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ledbr %f0, %f2 ; CHECK: br %r14 %res = fptrunc double %d2 to float @@ -13,7 +13,7 @@ define float @f1(double %d1, double %d2) { ; Test f128->f32. define float @f2(fp128 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lexbr %f0, %f0 ; CHECK: br %r14 %val = load fp128 *%ptr @@ -24,7 +24,7 @@ define float @f2(fp128 *%ptr) { ; Make sure that we don't use %f0 as the destination of LEXBR when %f2 ; is still live. define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lexbr %f1, %f1 ; CHECK: aebr %f1, %f2 ; CHECK: ste %f1, 0(%r2) @@ -38,7 +38,7 @@ define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) { ; Test f128->f64. define double @f4(fp128 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ldxbr %f0, %f0 ; CHECK: br %r14 %val = load fp128 *%ptr @@ -48,7 +48,7 @@ define double @f4(fp128 *%ptr) { ; Like f3, but for f128->f64. define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ldxbr %f1, %f1 ; CHECK: adbr %f1, %f2 ; CHECK: std %f1, 0(%r2) diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll index f284e1dc2ae47..93fb7c8d4d92d 100644 --- a/test/CodeGen/SystemZ/fp-conv-02.ll +++ b/test/CodeGen/SystemZ/fp-conv-02.ll @@ -4,7 +4,7 @@ ; Check register extension. define double @f1(float %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ldebr %f0, %f0 ; CHECK: br %r14 %res = fpext float %val to double @@ -13,7 +13,7 @@ define double @f1(float %val) { ; Check the low end of the LDEB range. define double @f2(float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ldeb %f0, 0(%r2) ; CHECK: br %r14 %val = load float *%ptr @@ -23,7 +23,7 @@ define double @f2(float *%ptr) { ; Check the high end of the aligned LDEB range. define double @f3(float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ldeb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -35,7 +35,7 @@ define double @f3(float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: ldeb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +47,7 @@ define double @f4(float *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: ldeb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +59,7 @@ define double @f5(float *%base) { ; Check that LDEB allows indices. define double @f6(float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: ldeb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +69,84 @@ define double @f6(float *%base, i64 %index) { %res = fpext float %val to double ret double %res } + +; Test a case where we spill the source of at least one LDEBR. We want +; to use LDEB if possible. +define void @f7(double *%ptr1, float *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile float *%ptr2 + %val1 = load volatile float *%ptr2 + %val2 = load volatile float *%ptr2 + %val3 = load volatile float *%ptr2 + %val4 = load volatile float *%ptr2 + %val5 = load volatile float *%ptr2 + %val6 = load volatile float *%ptr2 + %val7 = load volatile float *%ptr2 + %val8 = load volatile float *%ptr2 + %val9 = load volatile float *%ptr2 + %val10 = load volatile float *%ptr2 + %val11 = load volatile float *%ptr2 + %val12 = load volatile float *%ptr2 + %val13 = load volatile float *%ptr2 + %val14 = load volatile float *%ptr2 + %val15 = load volatile float *%ptr2 + %val16 = load volatile float *%ptr2 + + %ext0 = fpext float %val0 to double + %ext1 = fpext float %val1 to double + %ext2 = fpext float %val2 to double + %ext3 = fpext float %val3 to double + %ext4 = fpext float %val4 to double + %ext5 = fpext float %val5 to double + %ext6 = fpext float %val6 to double + %ext7 = fpext float %val7 to double + %ext8 = fpext float %val8 to double + %ext9 = fpext float %val9 to double + %ext10 = fpext float %val10 to double + %ext11 = fpext float %val11 to double + %ext12 = fpext float %val12 to double + %ext13 = fpext float %val13 to double + %ext14 = fpext float %val14 to double + %ext15 = fpext float %val15 to double + %ext16 = fpext float %val16 to double + + store volatile float %val0, float *%ptr2 + store volatile float %val1, float *%ptr2 + store volatile float %val2, float *%ptr2 + store volatile float %val3, float *%ptr2 + store volatile float %val4, float *%ptr2 + store volatile float %val5, float *%ptr2 + store volatile float %val6, float *%ptr2 + store volatile float %val7, float *%ptr2 + store volatile float %val8, float *%ptr2 + store volatile float %val9, float *%ptr2 + store volatile float %val10, float *%ptr2 + store volatile float %val11, float *%ptr2 + store volatile float %val12, float *%ptr2 + store volatile float %val13, float *%ptr2 + store volatile float %val14, float *%ptr2 + store volatile float %val15, float *%ptr2 + store volatile float %val16, float *%ptr2 + + store volatile double %ext0, double *%ptr1 + store volatile double %ext1, double *%ptr1 + store volatile double %ext2, double *%ptr1 + store volatile double %ext3, double *%ptr1 + store volatile double %ext4, double *%ptr1 + store volatile double %ext5, double *%ptr1 + store volatile double %ext6, double *%ptr1 + store volatile double %ext7, double *%ptr1 + store volatile double %ext8, double *%ptr1 + store volatile double %ext9, double *%ptr1 + store volatile double %ext10, double *%ptr1 + store volatile double %ext11, double *%ptr1 + store volatile double %ext12, double *%ptr1 + store volatile double %ext13, double *%ptr1 + store volatile double %ext14, double *%ptr1 + store volatile double %ext15, double *%ptr1 + store volatile double %ext16, double *%ptr1 + + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-03.ll b/test/CodeGen/SystemZ/fp-conv-03.ll index 703a141e3e128..d42ce6650aafb 100644 --- a/test/CodeGen/SystemZ/fp-conv-03.ll +++ b/test/CodeGen/SystemZ/fp-conv-03.ll @@ -4,7 +4,7 @@ ; Check register extension. define void @f1(fp128 *%dst, float %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -16,7 +16,7 @@ define void @f1(fp128 *%dst, float %val) { ; Check the low end of the LXEB range. define void @f2(fp128 *%dst, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lxeb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -29,7 +29,7 @@ define void @f2(fp128 *%dst, float *%ptr) { ; Check the high end of the aligned LXEB range. define void @f3(fp128 *%dst, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lxeb %f0, 4092(%r3) ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -44,7 +44,7 @@ define void @f3(fp128 *%dst, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f4(fp128 *%dst, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r3, 4096 ; CHECK: lxeb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) @@ -59,7 +59,7 @@ define void @f4(fp128 *%dst, float *%base) { ; Check negative displacements, which also need separate address logic. define void @f5(fp128 *%dst, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r3, -4 ; CHECK: lxeb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) @@ -74,7 +74,7 @@ define void @f5(fp128 *%dst, float *%base) { ; Check that LXEB allows indices. define void @f6(fp128 *%dst, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r4, 2 ; CHECK: lxeb %f0, 400(%r1,%r3) ; CHECK: std %f0, 0(%r2) @@ -87,3 +87,84 @@ define void @f6(fp128 *%dst, float *%base, i64 %index) { store fp128 %res, fp128 *%dst ret void } + +; Test a case where we spill the source of at least one LXEBR. We want +; to use LXEB if possible. +define void @f7(fp128 *%ptr1, float *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: lxeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile float *%ptr2 + %val1 = load volatile float *%ptr2 + %val2 = load volatile float *%ptr2 + %val3 = load volatile float *%ptr2 + %val4 = load volatile float *%ptr2 + %val5 = load volatile float *%ptr2 + %val6 = load volatile float *%ptr2 + %val7 = load volatile float *%ptr2 + %val8 = load volatile float *%ptr2 + %val9 = load volatile float *%ptr2 + %val10 = load volatile float *%ptr2 + %val11 = load volatile float *%ptr2 + %val12 = load volatile float *%ptr2 + %val13 = load volatile float *%ptr2 + %val14 = load volatile float *%ptr2 + %val15 = load volatile float *%ptr2 + %val16 = load volatile float *%ptr2 + + %ext0 = fpext float %val0 to fp128 + %ext1 = fpext float %val1 to fp128 + %ext2 = fpext float %val2 to fp128 + %ext3 = fpext float %val3 to fp128 + %ext4 = fpext float %val4 to fp128 + %ext5 = fpext float %val5 to fp128 + %ext6 = fpext float %val6 to fp128 + %ext7 = fpext float %val7 to fp128 + %ext8 = fpext float %val8 to fp128 + %ext9 = fpext float %val9 to fp128 + %ext10 = fpext float %val10 to fp128 + %ext11 = fpext float %val11 to fp128 + %ext12 = fpext float %val12 to fp128 + %ext13 = fpext float %val13 to fp128 + %ext14 = fpext float %val14 to fp128 + %ext15 = fpext float %val15 to fp128 + %ext16 = fpext float %val16 to fp128 + + store volatile float %val0, float *%ptr2 + store volatile float %val1, float *%ptr2 + store volatile float %val2, float *%ptr2 + store volatile float %val3, float *%ptr2 + store volatile float %val4, float *%ptr2 + store volatile float %val5, float *%ptr2 + store volatile float %val6, float *%ptr2 + store volatile float %val7, float *%ptr2 + store volatile float %val8, float *%ptr2 + store volatile float %val9, float *%ptr2 + store volatile float %val10, float *%ptr2 + store volatile float %val11, float *%ptr2 + store volatile float %val12, float *%ptr2 + store volatile float %val13, float *%ptr2 + store volatile float %val14, float *%ptr2 + store volatile float %val15, float *%ptr2 + store volatile float %val16, float *%ptr2 + + store volatile fp128 %ext0, fp128 *%ptr1 + store volatile fp128 %ext1, fp128 *%ptr1 + store volatile fp128 %ext2, fp128 *%ptr1 + store volatile fp128 %ext3, fp128 *%ptr1 + store volatile fp128 %ext4, fp128 *%ptr1 + store volatile fp128 %ext5, fp128 *%ptr1 + store volatile fp128 %ext6, fp128 *%ptr1 + store volatile fp128 %ext7, fp128 *%ptr1 + store volatile fp128 %ext8, fp128 *%ptr1 + store volatile fp128 %ext9, fp128 *%ptr1 + store volatile fp128 %ext10, fp128 *%ptr1 + store volatile fp128 %ext11, fp128 *%ptr1 + store volatile fp128 %ext12, fp128 *%ptr1 + store volatile fp128 %ext13, fp128 *%ptr1 + store volatile fp128 %ext14, fp128 *%ptr1 + store volatile fp128 %ext15, fp128 *%ptr1 + store volatile fp128 %ext16, fp128 *%ptr1 + + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-04.ll b/test/CodeGen/SystemZ/fp-conv-04.ll index b7b516693430b..518d6c28d8670 100644 --- a/test/CodeGen/SystemZ/fp-conv-04.ll +++ b/test/CodeGen/SystemZ/fp-conv-04.ll @@ -4,7 +4,7 @@ ; Check register extension. define void @f1(fp128 *%dst, double %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxdbr %f0, %f0 ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -16,7 +16,7 @@ define void @f1(fp128 *%dst, double %val) { ; Check the low end of the LXDB range. define void @f2(fp128 *%dst, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lxdb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -29,7 +29,7 @@ define void @f2(fp128 *%dst, double *%ptr) { ; Check the high end of the aligned LXDB range. define void @f3(fp128 *%dst, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lxdb %f0, 4088(%r3) ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -44,7 +44,7 @@ define void @f3(fp128 *%dst, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f4(fp128 *%dst, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r3, 4096 ; CHECK: lxdb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) @@ -59,7 +59,7 @@ define void @f4(fp128 *%dst, double *%base) { ; Check negative displacements, which also need separate address logic. define void @f5(fp128 *%dst, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r3, -8 ; CHECK: lxdb %f0, 0(%r3) ; CHECK: std %f0, 0(%r2) @@ -74,7 +74,7 @@ define void @f5(fp128 *%dst, double *%base) { ; Check that LXDB allows indices. define void @f6(fp128 *%dst, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r4, 3 ; CHECK: lxdb %f0, 800(%r1,%r3) ; CHECK: std %f0, 0(%r2) @@ -87,3 +87,84 @@ define void @f6(fp128 *%dst, double *%base, i64 %index) { store fp128 %res, fp128 *%dst ret void } + +; Test a case where we spill the source of at least one LXDBR. We want +; to use LXDB if possible. +define void @f7(fp128 *%ptr1, double *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: lxdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %val0 = load volatile double *%ptr2 + %val1 = load volatile double *%ptr2 + %val2 = load volatile double *%ptr2 + %val3 = load volatile double *%ptr2 + %val4 = load volatile double *%ptr2 + %val5 = load volatile double *%ptr2 + %val6 = load volatile double *%ptr2 + %val7 = load volatile double *%ptr2 + %val8 = load volatile double *%ptr2 + %val9 = load volatile double *%ptr2 + %val10 = load volatile double *%ptr2 + %val11 = load volatile double *%ptr2 + %val12 = load volatile double *%ptr2 + %val13 = load volatile double *%ptr2 + %val14 = load volatile double *%ptr2 + %val15 = load volatile double *%ptr2 + %val16 = load volatile double *%ptr2 + + %ext0 = fpext double %val0 to fp128 + %ext1 = fpext double %val1 to fp128 + %ext2 = fpext double %val2 to fp128 + %ext3 = fpext double %val3 to fp128 + %ext4 = fpext double %val4 to fp128 + %ext5 = fpext double %val5 to fp128 + %ext6 = fpext double %val6 to fp128 + %ext7 = fpext double %val7 to fp128 + %ext8 = fpext double %val8 to fp128 + %ext9 = fpext double %val9 to fp128 + %ext10 = fpext double %val10 to fp128 + %ext11 = fpext double %val11 to fp128 + %ext12 = fpext double %val12 to fp128 + %ext13 = fpext double %val13 to fp128 + %ext14 = fpext double %val14 to fp128 + %ext15 = fpext double %val15 to fp128 + %ext16 = fpext double %val16 to fp128 + + store volatile double %val0, double *%ptr2 + store volatile double %val1, double *%ptr2 + store volatile double %val2, double *%ptr2 + store volatile double %val3, double *%ptr2 + store volatile double %val4, double *%ptr2 + store volatile double %val5, double *%ptr2 + store volatile double %val6, double *%ptr2 + store volatile double %val7, double *%ptr2 + store volatile double %val8, double *%ptr2 + store volatile double %val9, double *%ptr2 + store volatile double %val10, double *%ptr2 + store volatile double %val11, double *%ptr2 + store volatile double %val12, double *%ptr2 + store volatile double %val13, double *%ptr2 + store volatile double %val14, double *%ptr2 + store volatile double %val15, double *%ptr2 + store volatile double %val16, double *%ptr2 + + store volatile fp128 %ext0, fp128 *%ptr1 + store volatile fp128 %ext1, fp128 *%ptr1 + store volatile fp128 %ext2, fp128 *%ptr1 + store volatile fp128 %ext3, fp128 *%ptr1 + store volatile fp128 %ext4, fp128 *%ptr1 + store volatile fp128 %ext5, fp128 *%ptr1 + store volatile fp128 %ext6, fp128 *%ptr1 + store volatile fp128 %ext7, fp128 *%ptr1 + store volatile fp128 %ext8, fp128 *%ptr1 + store volatile fp128 %ext9, fp128 *%ptr1 + store volatile fp128 %ext10, fp128 *%ptr1 + store volatile fp128 %ext11, fp128 *%ptr1 + store volatile fp128 %ext12, fp128 *%ptr1 + store volatile fp128 %ext13, fp128 *%ptr1 + store volatile fp128 %ext14, fp128 *%ptr1 + store volatile fp128 %ext15, fp128 *%ptr1 + store volatile fp128 %ext16, fp128 *%ptr1 + + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-05.ll b/test/CodeGen/SystemZ/fp-conv-05.ll index 2d887324c3e6e..deeffbf30c0ed 100644 --- a/test/CodeGen/SystemZ/fp-conv-05.ll +++ b/test/CodeGen/SystemZ/fp-conv-05.ll @@ -4,7 +4,7 @@ ; Check i32->f32. define float @f1(i32 %i) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cefbr %f0, %r2 ; CHECK: br %r14 %conv = sitofp i32 %i to float @@ -13,7 +13,7 @@ define float @f1(i32 %i) { ; Check i32->f64. define double @f2(i32 %i) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cdfbr %f0, %r2 ; CHECK: br %r14 %conv = sitofp i32 %i to double @@ -22,7 +22,7 @@ define double @f2(i32 %i) { ; Check i32->f128. define void @f3(i32 %i, fp128 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cxfbr %f0, %r2 ; CHECK: std %f0, 0(%r3) ; CHECK: std %f2, 8(%r3) diff --git a/test/CodeGen/SystemZ/fp-conv-06.ll b/test/CodeGen/SystemZ/fp-conv-06.ll index 1b39b67d49b8c..466c1456a0cb6 100644 --- a/test/CodeGen/SystemZ/fp-conv-06.ll +++ b/test/CodeGen/SystemZ/fp-conv-06.ll @@ -5,7 +5,7 @@ ; Check i32->f32. There is no native instruction, so we must promote ; to i64 first. define float @f1(i32 %i) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 ; CHECK: cegbr %f0, [[REGISTER]] ; CHECK: br %r14 @@ -15,7 +15,7 @@ define float @f1(i32 %i) { ; Check i32->f64. define double @f2(i32 %i) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 ; CHECK: cdgbr %f0, [[REGISTER]] ; CHECK: br %r14 @@ -25,7 +25,7 @@ define double @f2(i32 %i) { ; Check i32->f128. define void @f3(i32 %i, fp128 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 ; CHECK: cxgbr %f0, [[REGISTER]] ; CHECK: std %f0, 0(%r3) diff --git a/test/CodeGen/SystemZ/fp-conv-07.ll b/test/CodeGen/SystemZ/fp-conv-07.ll index 0ebbd37d512dc..aba5c4c0195d0 100644 --- a/test/CodeGen/SystemZ/fp-conv-07.ll +++ b/test/CodeGen/SystemZ/fp-conv-07.ll @@ -4,7 +4,7 @@ ; Test i64->f32. define float @f1(i64 %i) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cegbr %f0, %r2 ; CHECK: br %r14 %conv = sitofp i64 %i to float @@ -13,7 +13,7 @@ define float @f1(i64 %i) { ; Test i64->f64. define double @f2(i64 %i) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cdgbr %f0, %r2 ; CHECK: br %r14 %conv = sitofp i64 %i to double @@ -22,7 +22,7 @@ define double @f2(i64 %i) { ; Test i64->f128. define void @f3(i64 %i, fp128 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cxgbr %f0, %r2 ; CHECK: std %f0, 0(%r3) ; CHECK: std %f2, 8(%r3) diff --git a/test/CodeGen/SystemZ/fp-conv-08.ll b/test/CodeGen/SystemZ/fp-conv-08.ll index 20c4e30f07960..69b2d13e29f0e 100644 --- a/test/CodeGen/SystemZ/fp-conv-08.ll +++ b/test/CodeGen/SystemZ/fp-conv-08.ll @@ -5,7 +5,7 @@ ; Test i64->f32. There's no native support for unsigned i64-to-fp conversions, ; but we should be able to implement them using signed i64-to-fp conversions. define float @f1(i64 %i) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cegbr ; CHECK: aebr ; CHECK: br %r14 @@ -15,9 +15,9 @@ define float @f1(i64 %i) { ; Test i64->f64. define double @f2(i64 %i) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ldgr -; CHECL: adbr +; CHECK: adbr ; CHECK: br %r14 %conv = uitofp i64 %i to double ret double %conv @@ -25,7 +25,7 @@ define double @f2(i64 %i) { ; Test i64->f128. define void @f3(i64 %i, fp128 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cxgbr ; CHECK: axbr ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-conv-09.ll b/test/CodeGen/SystemZ/fp-conv-09.ll index e3c0352cf84ee..6aee73644a16b 100644 --- a/test/CodeGen/SystemZ/fp-conv-09.ll +++ b/test/CodeGen/SystemZ/fp-conv-09.ll @@ -4,7 +4,7 @@ ; Test f32->i32. define i32 @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cfebr %r2, 5, %f0 ; CHECK: br %r14 %conv = fptosi float %f to i32 @@ -13,7 +13,7 @@ define i32 @f1(float %f) { ; Test f64->i32. define i32 @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cfdbr %r2, 5, %f0 ; CHECK: br %r14 %conv = fptosi double %f to i32 @@ -22,7 +22,7 @@ define i32 @f2(double %f) { ; Test f128->i32. define i32 @f3(fp128 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ld %f0, 0(%r2) ; CHECK: ld %f2, 8(%r2) ; CHECK: cfxbr %r2, 5, %f0 diff --git a/test/CodeGen/SystemZ/fp-conv-10.ll b/test/CodeGen/SystemZ/fp-conv-10.ll index bb8878bacee83..723d19d2a1de8 100644 --- a/test/CodeGen/SystemZ/fp-conv-10.ll +++ b/test/CodeGen/SystemZ/fp-conv-10.ll @@ -9,7 +9,7 @@ ; Test f32->i32. define i32 @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cebr ; CHECK: sebr ; CHECK: cfebr @@ -21,7 +21,7 @@ define i32 @f1(float %f) { ; Test f64->i32. define i32 @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cdbr ; CHECK: sdbr ; CHECK: cfdbr @@ -33,7 +33,7 @@ define i32 @f2(double %f) { ; Test f128->i32. define i32 @f3(fp128 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cxbr ; CHECK: sxbr ; CHECK: cfxbr diff --git a/test/CodeGen/SystemZ/fp-conv-11.ll b/test/CodeGen/SystemZ/fp-conv-11.ll index 2a36cb955cb52..46f4cb3a6d893 100644 --- a/test/CodeGen/SystemZ/fp-conv-11.ll +++ b/test/CodeGen/SystemZ/fp-conv-11.ll @@ -4,7 +4,7 @@ ; Test f32->i64. define i64 @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cgebr %r2, 5, %f0 ; CHECK: br %r14 %conv = fptosi float %f to i64 @@ -13,7 +13,7 @@ define i64 @f1(float %f) { ; Test f64->i64. define i64 @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cgdbr %r2, 5, %f0 ; CHECK: br %r14 %conv = fptosi double %f to i64 @@ -22,7 +22,7 @@ define i64 @f2(double %f) { ; Test f128->i64. define i64 @f3(fp128 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ld %f0, 0(%r2) ; CHECK: ld %f2, 8(%r2) ; CHECK: cgxbr %r2, 5, %f0 diff --git a/test/CodeGen/SystemZ/fp-conv-12.ll b/test/CodeGen/SystemZ/fp-conv-12.ll index 4445b14ee8ef1..6cc343abdafc2 100644 --- a/test/CodeGen/SystemZ/fp-conv-12.ll +++ b/test/CodeGen/SystemZ/fp-conv-12.ll @@ -8,7 +8,7 @@ ; Test f32->i64. define i64 @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cebr ; CHECK: sebr ; CHECK: cgebr @@ -20,7 +20,7 @@ define i64 @f1(float %f) { ; Test f64->i64. define i64 @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cdbr ; CHECK: sdbr ; CHECK: cgdbr @@ -32,7 +32,7 @@ define i64 @f2(double %f) { ; Test f128->i64. define i64 @f3(fp128 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cxbr ; CHECK: sxbr ; CHECK: cgxbr diff --git a/test/CodeGen/SystemZ/fp-copysign-01.ll b/test/CodeGen/SystemZ/fp-copysign-01.ll index 458d475bdf3c5..50177e5f41bf4 100644 --- a/test/CodeGen/SystemZ/fp-copysign-01.ll +++ b/test/CodeGen/SystemZ/fp-copysign-01.ll @@ -9,7 +9,7 @@ declare fp128 @copysignl(fp128, fp128) readnone ; Test f32 copies in which the sign comes from an f32. define float @f1(float %a, float %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: %f2 ; CHECK: cpsdr %f0, %f0, %f2 ; CHECK: br %r14 @@ -19,7 +19,7 @@ define float @f1(float %a, float %b) { ; Test f32 copies in which the sign comes from an f64. define float @f2(float %a, double %bd) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: %f2 ; CHECK: cpsdr %f0, %f0, %f2 ; CHECK: br %r14 @@ -30,7 +30,7 @@ define float @f2(float %a, double %bd) { ; Test f32 copies in which the sign comes from an f128. define float @f3(float %a, fp128 *%bptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2) ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2) ; CHECK: cpsdr %f0, %f0, [[BHIGH]] @@ -43,7 +43,7 @@ define float @f3(float %a, fp128 *%bptr) { ; Test f64 copies in which the sign comes from an f32. define double @f4(double %a, float %bf) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %f2 ; CHECK: cpsdr %f0, %f0, %f2 ; CHECK: br %r14 @@ -54,7 +54,7 @@ define double @f4(double %a, float %bf) { ; Test f64 copies in which the sign comes from an f64. define double @f5(double %a, double %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: %f2 ; CHECK: cpsdr %f0, %f0, %f2 ; CHECK: br %r14 @@ -64,7 +64,7 @@ define double @f5(double %a, double %b) { ; Test f64 copies in which the sign comes from an f128. define double @f6(double %a, fp128 *%bptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2) ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2) ; CHECK: cpsdr %f0, %f0, [[BHIGH]] @@ -79,7 +79,7 @@ define double @f6(double %a, fp128 *%bptr) { ; need any register shuffling here; %a should be tied to %c, with CPSDR ; just changing the high register. define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) ; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0 @@ -95,7 +95,7 @@ define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) { ; As above, but the sign comes from an f64. define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) ; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0 @@ -112,7 +112,7 @@ define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) { ; As above, but the sign comes from an f128. Don't require the low part ; of %b to be loaded, since it isn't used. define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r4) diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll index 080d45eb2bfbf..1b99463327b4d 100644 --- a/test/CodeGen/SystemZ/fp-div-01.ll +++ b/test/CodeGen/SystemZ/fp-div-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare float @foo() + ; Check register division. define float @f1(float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: debr %f0, %f2 ; CHECK: br %r14 %res = fdiv float %f1, %f2 @@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) { ; Check the low end of the DEB range. define float @f2(float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: deb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load float *%ptr @@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) { ; Check the high end of the aligned DEB range. define float @f3(float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: deb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f4(float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: deb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define float @f5(float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: deb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) { ; Check that DEB allows indices. define float @f6(float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: deb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) { %res = fdiv float %f1, %f2 ret float %res } + +; Check that divisions of spilled values can use DEB rather than DEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: deb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %ret = call float @foo() + + %div0 = fdiv float %ret, %val0 + %div1 = fdiv float %div0, %val1 + %div2 = fdiv float %div1, %val2 + %div3 = fdiv float %div2, %val3 + %div4 = fdiv float %div3, %val4 + %div5 = fdiv float %div4, %val5 + %div6 = fdiv float %div5, %val6 + %div7 = fdiv float %div6, %val7 + %div8 = fdiv float %div7, %val8 + %div9 = fdiv float %div8, %val9 + %div10 = fdiv float %div9, %val10 + + ret float %div10 +} diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll index c5cae15a824b2..513664bd94964 100644 --- a/test/CodeGen/SystemZ/fp-div-02.ll +++ b/test/CodeGen/SystemZ/fp-div-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @foo() + ; Check register division. define double @f1(double %f1, double %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ddbr %f0, %f2 ; CHECK: br %r14 %res = fdiv double %f1, %f2 @@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) { ; Check the low end of the DDB range. define double @f2(double %f1, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ddb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load double *%ptr @@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) { ; Check the high end of the aligned DDB range. define double @f3(double %f1, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ddb %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %f1, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: ddb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(double %f1, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: ddb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) { ; Check that DDB allows indices. define double @f6(double %f1, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: ddb %f0, 800(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) { %res = fdiv double %f1, %f2 ret double %res } + +; Check that divisions of spilled values can use DDB rather than DDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: ddb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %ret = call double @foo() + + %div0 = fdiv double %ret, %val0 + %div1 = fdiv double %div0, %val1 + %div2 = fdiv double %div1, %val2 + %div3 = fdiv double %div2, %val3 + %div4 = fdiv double %div3, %val4 + %div5 = fdiv double %div4, %val5 + %div6 = fdiv double %div5, %val6 + %div7 = fdiv double %div6, %val7 + %div8 = fdiv double %div7, %val8 + %div9 = fdiv double %div8, %val9 + %div10 = fdiv double %div9, %val10 + + ret double %div10 +} diff --git a/test/CodeGen/SystemZ/fp-div-03.ll b/test/CodeGen/SystemZ/fp-div-03.ll index 18f2d7449a80f..079b349b4084a 100644 --- a/test/CodeGen/SystemZ/fp-div-03.ll +++ b/test/CodeGen/SystemZ/fp-div-03.ll @@ -4,7 +4,7 @@ ; There is no memory form of 128-bit division. define void @f1(fp128 *%ptr, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: ld %f1, 0(%r2) ; CHECK: ld %f3, 8(%r2) diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll index 73cd978c5975e..d16502f2f7c8d 100644 --- a/test/CodeGen/SystemZ/fp-move-01.ll +++ b/test/CodeGen/SystemZ/fp-move-01.ll @@ -4,14 +4,14 @@ ; Test f32 moves. define float @f1(float %a, float %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ler %f0, %f2 ret float %b } ; Test f64 moves. define double @f2(double %a, double %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ldr %f0, %f2 ret double %b } @@ -19,7 +19,7 @@ define double @f2(double %a, double %b) { ; Test f128 moves. Since f128s are passed by reference, we need to force ; a copy by other means. define void @f3(fp128 *%x) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lxr ; CHECK: axbr %val = load volatile fp128 *%x diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll index 9d87797c8f924..505ee8d37a4eb 100644 --- a/test/CodeGen/SystemZ/fp-move-02.ll +++ b/test/CodeGen/SystemZ/fp-move-02.ll @@ -1,11 +1,17 @@ -; Test moves between FPRs and GPRs. +; Test moves between FPRs and GPRs. The 32-bit cases test the z10 +; implementation, which has no high-word support. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare i64 @foo() +declare double @bar() +@dptr = external global double +@iptr = external global i64 ; Test 32-bit moves from GPRs to FPRs. The GPR must be moved into the high ; 32 bits of the FPR. define float @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 32 ; CHECK: ldgr %f0, [[REGISTER]] %res = bitcast i32 %a to float @@ -15,8 +21,8 @@ define float @f1(i32 %a) { ; Like f1, but create a situation where the shift can be folded with ; surrounding code. define float @f2(i64 %big) { -; CHECK: f2: -; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 31 +; CHECK-LABEL: f2: +; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 31 ; CHECK: ldgr %f0, [[REGISTER]] %shift = lshr i64 %big, 1 %a = trunc i64 %shift to i32 @@ -26,8 +32,8 @@ define float @f2(i64 %big) { ; Another example of the same thing. define float @f3(i64 %big) { -; CHECK: f3: -; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 2 +; CHECK-LABEL: f3: +; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 2 ; CHECK: ldgr %f0, [[REGISTER]] %shift = ashr i64 %big, 30 %a = trunc i64 %shift to i32 @@ -37,7 +43,7 @@ define float @f3(i64 %big) { ; Like f1, but the value to transfer is already in the high 32 bits. define float @f4(i64 %big) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r2 ; CHECK: nilf %r2, 0 ; CHECK-NOT: %r2 @@ -50,7 +56,7 @@ define float @f4(i64 %big) { ; Test 64-bit moves from GPRs to FPRs. define double @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ldgr %f0, %r2 %res = bitcast i64 %a to double ret double %res @@ -59,11 +65,12 @@ define double @f5(i64 %a) { ; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, ; so this goes through memory. define void @f6(fp128 *%a, i128 *%b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lg ; CHECK: lg ; CHECK: stg ; CHECK: stg +; CHECK: br %r14 %val = load i128 *%b %res = bitcast i128 %val to fp128 store fp128 %res, fp128 *%a @@ -73,7 +80,7 @@ define void @f6(fp128 *%a, i128 *%b) { ; Test 32-bit moves from FPRs to GPRs. The high 32 bits of the FPR should ; be moved into the low 32 bits of the GPR. define i32 @f7(float %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgdr [[REGISTER:%r[0-5]]], %f0 ; CHECK: srlg %r2, [[REGISTER]], 32 %res = bitcast float %a to i32 @@ -82,7 +89,7 @@ define i32 @f7(float %a) { ; Test 64-bit moves from FPRs to GPRs. define i64 @f8(double %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lgdr %r2, %f0 %res = bitcast double %a to i64 ret i64 %res @@ -90,7 +97,7 @@ define i64 @f8(double %a) { ; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6. define void @f9(fp128 *%a, i128 *%b) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ld ; CHECK: ld ; CHECK: std @@ -101,3 +108,286 @@ define void @f9(fp128 *%a, i128 *%b) { ret void } +; Test cases where the destination of an LGDR needs to be spilled. +; We shouldn't have any integer stack stores or floating-point loads. +define void @f10(double %extra) { +; CHECK-LABEL: f10: +; CHECK: dptr +; CHECK-NOT: stg {{.*}}(%r15) +; CHECK: %loop +; CHECK-NOT: ld {{.*}}(%r15) +; CHECK: %exit +; CHECK: br %r14 +entry: + %double0 = load volatile double *@dptr + %biased0 = fadd double %double0, %extra + %int0 = bitcast double %biased0 to i64 + %double1 = load volatile double *@dptr + %biased1 = fadd double %double1, %extra + %int1 = bitcast double %biased1 to i64 + %double2 = load volatile double *@dptr + %biased2 = fadd double %double2, %extra + %int2 = bitcast double %biased2 to i64 + %double3 = load volatile double *@dptr + %biased3 = fadd double %double3, %extra + %int3 = bitcast double %biased3 to i64 + %double4 = load volatile double *@dptr + %biased4 = fadd double %double4, %extra + %int4 = bitcast double %biased4 to i64 + %double5 = load volatile double *@dptr + %biased5 = fadd double %double5, %extra + %int5 = bitcast double %biased5 to i64 + %double6 = load volatile double *@dptr + %biased6 = fadd double %double6, %extra + %int6 = bitcast double %biased6 to i64 + %double7 = load volatile double *@dptr + %biased7 = fadd double %double7, %extra + %int7 = bitcast double %biased7 to i64 + %double8 = load volatile double *@dptr + %biased8 = fadd double %double8, %extra + %int8 = bitcast double %biased8 to i64 + %double9 = load volatile double *@dptr + %biased9 = fadd double %double9, %extra + %int9 = bitcast double %biased9 to i64 + br label %loop + +loop: + %start = call i64 @foo() + %or0 = or i64 %start, %int0 + %or1 = or i64 %or0, %int1 + %or2 = or i64 %or1, %int2 + %or3 = or i64 %or2, %int3 + %or4 = or i64 %or3, %int4 + %or5 = or i64 %or4, %int5 + %or6 = or i64 %or5, %int6 + %or7 = or i64 %or6, %int7 + %or8 = or i64 %or7, %int8 + %or9 = or i64 %or8, %int9 + store i64 %or9, i64 *@iptr + %cont = icmp ne i64 %start, 1 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; ...likewise LDGR, with the requirements the other way around. +define void @f11(i64 %mask) { +; CHECK-LABEL: f11: +; CHECK: iptr +; CHECK-NOT: std {{.*}}(%r15) +; CHECK: %loop +; CHECK-NOT: lg {{.*}}(%r15) +; CHECK: %exit +; CHECK: br %r14 +entry: + %int0 = load volatile i64 *@iptr + %masked0 = and i64 %int0, %mask + %double0 = bitcast i64 %masked0 to double + %int1 = load volatile i64 *@iptr + %masked1 = and i64 %int1, %mask + %double1 = bitcast i64 %masked1 to double + %int2 = load volatile i64 *@iptr + %masked2 = and i64 %int2, %mask + %double2 = bitcast i64 %masked2 to double + %int3 = load volatile i64 *@iptr + %masked3 = and i64 %int3, %mask + %double3 = bitcast i64 %masked3 to double + %int4 = load volatile i64 *@iptr + %masked4 = and i64 %int4, %mask + %double4 = bitcast i64 %masked4 to double + %int5 = load volatile i64 *@iptr + %masked5 = and i64 %int5, %mask + %double5 = bitcast i64 %masked5 to double + %int6 = load volatile i64 *@iptr + %masked6 = and i64 %int6, %mask + %double6 = bitcast i64 %masked6 to double + %int7 = load volatile i64 *@iptr + %masked7 = and i64 %int7, %mask + %double7 = bitcast i64 %masked7 to double + %int8 = load volatile i64 *@iptr + %masked8 = and i64 %int8, %mask + %double8 = bitcast i64 %masked8 to double + %int9 = load volatile i64 *@iptr + %masked9 = and i64 %int9, %mask + %double9 = bitcast i64 %masked9 to double + br label %loop + +loop: + %start = call double @bar() + %add0 = fadd double %start, %double0 + %add1 = fadd double %add0, %double1 + %add2 = fadd double %add1, %double2 + %add3 = fadd double %add2, %double3 + %add4 = fadd double %add3, %double4 + %add5 = fadd double %add4, %double5 + %add6 = fadd double %add5, %double6 + %add7 = fadd double %add6, %double7 + %add8 = fadd double %add7, %double8 + %add9 = fadd double %add8, %double9 + store double %add9, double *@dptr + %cont = fcmp one double %start, 1.0 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; Test cases where the source of an LDGR needs to be spilled. +; We shouldn't have any integer stack stores or floating-point loads. +define void @f12() { +; CHECK-LABEL: f12: +; CHECK: %loop +; CHECK-NOT: std {{.*}}(%r15) +; CHECK: %exit +; CHECK: foo@PLT +; CHECK-NOT: lg {{.*}}(%r15) +; CHECK: foo@PLT +; CHECK: br %r14 +entry: + br label %loop + +loop: + %int0 = phi i64 [ 0, %entry ], [ %add0, %loop ] + %int1 = phi i64 [ 0, %entry ], [ %add1, %loop ] + %int2 = phi i64 [ 0, %entry ], [ %add2, %loop ] + %int3 = phi i64 [ 0, %entry ], [ %add3, %loop ] + %int4 = phi i64 [ 0, %entry ], [ %add4, %loop ] + %int5 = phi i64 [ 0, %entry ], [ %add5, %loop ] + %int6 = phi i64 [ 0, %entry ], [ %add6, %loop ] + %int7 = phi i64 [ 0, %entry ], [ %add7, %loop ] + %int8 = phi i64 [ 0, %entry ], [ %add8, %loop ] + %int9 = phi i64 [ 0, %entry ], [ %add9, %loop ] + + %bias = call i64 @foo() + %add0 = add i64 %int0, %bias + %add1 = add i64 %int1, %bias + %add2 = add i64 %int2, %bias + %add3 = add i64 %int3, %bias + %add4 = add i64 %int4, %bias + %add5 = add i64 %int5, %bias + %add6 = add i64 %int6, %bias + %add7 = add i64 %int7, %bias + %add8 = add i64 %int8, %bias + %add9 = add i64 %int9, %bias + %cont = icmp ne i64 %bias, 1 + br i1 %cont, label %loop, label %exit + +exit: + %unused1 = call i64 @foo() + %factor = load volatile double *@dptr + + %conv0 = bitcast i64 %add0 to double + %mul0 = fmul double %conv0, %factor + store volatile double %mul0, double *@dptr + %conv1 = bitcast i64 %add1 to double + %mul1 = fmul double %conv1, %factor + store volatile double %mul1, double *@dptr + %conv2 = bitcast i64 %add2 to double + %mul2 = fmul double %conv2, %factor + store volatile double %mul2, double *@dptr + %conv3 = bitcast i64 %add3 to double + %mul3 = fmul double %conv3, %factor + store volatile double %mul3, double *@dptr + %conv4 = bitcast i64 %add4 to double + %mul4 = fmul double %conv4, %factor + store volatile double %mul4, double *@dptr + %conv5 = bitcast i64 %add5 to double + %mul5 = fmul double %conv5, %factor + store volatile double %mul5, double *@dptr + %conv6 = bitcast i64 %add6 to double + %mul6 = fmul double %conv6, %factor + store volatile double %mul6, double *@dptr + %conv7 = bitcast i64 %add7 to double + %mul7 = fmul double %conv7, %factor + store volatile double %mul7, double *@dptr + %conv8 = bitcast i64 %add8 to double + %mul8 = fmul double %conv8, %factor + store volatile double %mul8, double *@dptr + %conv9 = bitcast i64 %add9 to double + %mul9 = fmul double %conv9, %factor + store volatile double %mul9, double *@dptr + + %unused2 = call i64 @foo() + + ret void +} + +; ...likewise LGDR, with the requirements the other way around. +define void @f13() { +; CHECK-LABEL: f13: +; CHECK: %loop +; CHECK-NOT: stg {{.*}}(%r15) +; CHECK: %exit +; CHECK: foo@PLT +; CHECK-NOT: ld {{.*}}(%r15) +; CHECK: foo@PLT +; CHECK: br %r14 +entry: + br label %loop + +loop: + %double0 = phi double [ 1.0, %entry ], [ %mul0, %loop ] + %double1 = phi double [ 1.0, %entry ], [ %mul1, %loop ] + %double2 = phi double [ 1.0, %entry ], [ %mul2, %loop ] + %double3 = phi double [ 1.0, %entry ], [ %mul3, %loop ] + %double4 = phi double [ 1.0, %entry ], [ %mul4, %loop ] + %double5 = phi double [ 1.0, %entry ], [ %mul5, %loop ] + %double6 = phi double [ 1.0, %entry ], [ %mul6, %loop ] + %double7 = phi double [ 1.0, %entry ], [ %mul7, %loop ] + %double8 = phi double [ 1.0, %entry ], [ %mul8, %loop ] + %double9 = phi double [ 1.0, %entry ], [ %mul9, %loop ] + + %factor = call double @bar() + %mul0 = fmul double %double0, %factor + %mul1 = fmul double %double1, %factor + %mul2 = fmul double %double2, %factor + %mul3 = fmul double %double3, %factor + %mul4 = fmul double %double4, %factor + %mul5 = fmul double %double5, %factor + %mul6 = fmul double %double6, %factor + %mul7 = fmul double %double7, %factor + %mul8 = fmul double %double8, %factor + %mul9 = fmul double %double9, %factor + %cont = fcmp one double %factor, 1.0 + br i1 %cont, label %loop, label %exit + +exit: + %unused1 = call i64 @foo() + %bias = load volatile i64 *@iptr + + %conv0 = bitcast double %mul0 to i64 + %add0 = add i64 %conv0, %bias + store volatile i64 %add0, i64 *@iptr + %conv1 = bitcast double %mul1 to i64 + %add1 = add i64 %conv1, %bias + store volatile i64 %add1, i64 *@iptr + %conv2 = bitcast double %mul2 to i64 + %add2 = add i64 %conv2, %bias + store volatile i64 %add2, i64 *@iptr + %conv3 = bitcast double %mul3 to i64 + %add3 = add i64 %conv3, %bias + store volatile i64 %add3, i64 *@iptr + %conv4 = bitcast double %mul4 to i64 + %add4 = add i64 %conv4, %bias + store volatile i64 %add4, i64 *@iptr + %conv5 = bitcast double %mul5 to i64 + %add5 = add i64 %conv5, %bias + store volatile i64 %add5, i64 *@iptr + %conv6 = bitcast double %mul6 to i64 + %add6 = add i64 %conv6, %bias + store volatile i64 %add6, i64 *@iptr + %conv7 = bitcast double %mul7 to i64 + %add7 = add i64 %conv7, %bias + store volatile i64 %add7, i64 *@iptr + %conv8 = bitcast double %mul8 to i64 + %add8 = add i64 %conv8, %bias + store volatile i64 %add8, i64 *@iptr + %conv9 = bitcast double %mul9 to i64 + %add9 = add i64 %conv9, %bias + store volatile i64 %add9, i64 *@iptr + + %unused2 = call i64 @foo() + + ret void +} diff --git a/test/CodeGen/SystemZ/fp-move-03.ll b/test/CodeGen/SystemZ/fp-move-03.ll index 37dbdfad7b879..1273358f65add 100644 --- a/test/CodeGen/SystemZ/fp-move-03.ll +++ b/test/CodeGen/SystemZ/fp-move-03.ll @@ -4,7 +4,7 @@ ; Test the low end of the LE range. define float @f1(float *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: le %f0, 0(%r2) ; CHECK: br %r14 %val = load float *%src @@ -13,7 +13,7 @@ define float @f1(float *%src) { ; Test the high end of the LE range. define float @f2(float *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: le %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 1023 @@ -23,7 +23,7 @@ define float @f2(float *%src) { ; Check the next word up, which should use LEY instead of LE. define float @f3(float *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ley %f0, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 1024 @@ -33,7 +33,7 @@ define float @f3(float *%src) { ; Check the high end of the aligned LEY range. define float @f4(float *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ley %f0, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 131071 @@ -44,7 +44,7 @@ define float @f4(float *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f5(float *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: le %f0, 0(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define float @f5(float *%src) { ; Check the high end of the negative aligned LEY range. define float @f6(float *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ley %f0, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 -1 @@ -65,7 +65,7 @@ define float @f6(float *%src) { ; Check the low end of the LEY range. define float @f7(float *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ley %f0, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 -131072 @@ -76,7 +76,7 @@ define float @f7(float *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f8(float *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524292 ; CHECK: le %f0, 0(%r2) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define float @f8(float *%src) { ; Check that LE allows an index. define float @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: le %f0, 4092({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -99,7 +99,7 @@ define float @f9(i64 %src, i64 %index) { ; Check that LEY allows an index. define float @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll index 72e90d1fffd1c..1b0278fdee0ff 100644 --- a/test/CodeGen/SystemZ/fp-move-04.ll +++ b/test/CodeGen/SystemZ/fp-move-04.ll @@ -4,7 +4,7 @@ ; Test the low end of the LD range. define double @f1(double *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ld %f0, 0(%r2) ; CHECK: br %r14 %val = load double *%src @@ -13,7 +13,7 @@ define double @f1(double *%src) { ; Test the high end of the LD range. define double @f2(double *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ld %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 511 @@ -23,7 +23,7 @@ define double @f2(double *%src) { ; Check the next doubleword up, which should use LDY instead of LD. define double @f3(double *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ldy %f0, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 512 @@ -33,7 +33,7 @@ define double @f3(double *%src) { ; Check the high end of the aligned LDY range. define double @f4(double *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ldy %f0, 524280(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 65535 @@ -44,7 +44,7 @@ define double @f4(double *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f5(double *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: ld %f0, 0(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define double @f5(double *%src) { ; Check the high end of the negative aligned LDY range. define double @f6(double *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ldy %f0, -8(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 -1 @@ -65,7 +65,7 @@ define double @f6(double *%src) { ; Check the low end of the LDY range. define double @f7(double *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ldy %f0, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 -65536 @@ -76,7 +76,7 @@ define double @f7(double *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f8(double *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524296 ; CHECK: ld %f0, 0(%r2) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define double @f8(double *%src) { ; Check that LD allows an index. define double @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ld %f0, 4095({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -99,7 +99,7 @@ define double @f9(i64 %src, i64 %index) { ; Check that LDY allows an index. define double @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ldy %f0, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll index 66ad048fbed76..d302a0f9c6338 100644 --- a/test/CodeGen/SystemZ/fp-move-05.ll +++ b/test/CodeGen/SystemZ/fp-move-05.ll @@ -4,7 +4,7 @@ ; Check loads with no offset. define double @f1(i64 %src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ld %f0, 0(%r2) ; CHECK: ld %f2, 8(%r2) ; CHECK: br %r14 @@ -16,7 +16,7 @@ define double @f1(i64 %src) { ; Check the highest aligned offset that allows LD for both halves. define double @f2(i64 %src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ld %f0, 4080(%r2) ; CHECK: ld %f2, 4088(%r2) ; CHECK: br %r14 @@ -29,7 +29,7 @@ define double @f2(i64 %src) { ; Check the next doubleword up, which requires a mixture of LD and LDY. define double @f3(i64 %src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ld %f0, 4088(%r2) ; CHECK: ldy %f2, 4096(%r2) ; CHECK: br %r14 @@ -42,7 +42,7 @@ define double @f3(i64 %src) { ; Check the next doubleword after that, which requires LDY for both halves. define double @f4(i64 %src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ldy %f0, 4096(%r2) ; CHECK: ldy %f2, 4104(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define double @f4(i64 %src) { ; Check the highest aligned offset that allows LDY for both halves. define double @f5(i64 %src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ldy %f0, 524272(%r2) ; CHECK: ldy %f2, 524280(%r2) ; CHECK: br %r14 @@ -69,7 +69,7 @@ define double @f5(i64 %src) { ; Check the next doubleword up, which requires separate address logic. ; Other sequences besides this one would be OK. define double @f6(i64 %src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lay %r1, 524280(%r2) ; CHECK: ld %f0, 0(%r1) ; CHECK: ld %f2, 8(%r1) @@ -84,7 +84,7 @@ define double @f6(i64 %src) { ; Check the highest aligned negative offset, which needs a combination of ; LDY and LD. define double @f7(i64 %src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ldy %f0, -8(%r2) ; CHECK: ld %f2, 0(%r2) ; CHECK: br %r14 @@ -97,7 +97,7 @@ define double @f7(i64 %src) { ; Check the next doubleword down, which requires LDY for both halves. define double @f8(i64 %src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ldy %f0, -16(%r2) ; CHECK: ldy %f2, -8(%r2) ; CHECK: br %r14 @@ -110,7 +110,7 @@ define double @f8(i64 %src) { ; Check the lowest offset that allows LDY for both halves. define double @f9(i64 %src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ldy %f0, -524288(%r2) ; CHECK: ldy %f2, -524280(%r2) ; CHECK: br %r14 @@ -124,7 +124,7 @@ define double @f9(i64 %src) { ; Check the next doubleword down, which requires separate address logic. ; Other sequences besides this one would be OK. define double @f10(i64 %src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r2, -524296 ; CHECK: ld %f0, 0(%r2) ; CHECK: ld %f2, 8(%r2) @@ -138,7 +138,7 @@ define double @f10(i64 %src) { ; Check that indices are allowed. define double @f11(i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: ld %f0, 4088({{%r2,%r3|%r3,%r2}}) ; CHECK: ldy %f2, 4096({{%r2,%r3|%r3,%r2}}) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-move-06.ll b/test/CodeGen/SystemZ/fp-move-06.ll index b660c2ac223da..da67691729e36 100644 --- a/test/CodeGen/SystemZ/fp-move-06.ll +++ b/test/CodeGen/SystemZ/fp-move-06.ll @@ -4,7 +4,7 @@ ; Test the low end of the STE range. define void @f1(float *%ptr, float %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ste %f0, 0(%r2) ; CHECK: br %r14 store float %val, float *%ptr @@ -13,7 +13,7 @@ define void @f1(float *%ptr, float %val) { ; Test the high end of the STE range. define void @f2(float *%src, float %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ste %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 1023 @@ -23,7 +23,7 @@ define void @f2(float *%src, float %val) { ; Check the next word up, which should use STEY instead of STE. define void @f3(float *%src, float %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stey %f0, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 1024 @@ -33,7 +33,7 @@ define void @f3(float *%src, float %val) { ; Check the high end of the aligned STEY range. define void @f4(float *%src, float %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stey %f0, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 131071 @@ -44,7 +44,7 @@ define void @f4(float *%src, float %val) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f5(float *%src, float %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: ste %f0, 0(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define void @f5(float *%src, float %val) { ; Check the high end of the negative aligned STEY range. define void @f6(float *%src, float %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: stey %f0, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 -1 @@ -65,7 +65,7 @@ define void @f6(float *%src, float %val) { ; Check the low end of the STEY range. define void @f7(float *%src, float %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: stey %f0, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%src, i64 -131072 @@ -76,7 +76,7 @@ define void @f7(float *%src, float %val) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(float *%src, float %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524292 ; CHECK: ste %f0, 0(%r2) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define void @f8(float *%src, float %val) { ; Check that STE allows an index. define void @f9(i64 %src, i64 %index, float %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ste %f0, 4092({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -99,7 +99,7 @@ define void @f9(i64 %src, i64 %index, float %val) { ; Check that STEY allows an index. define void @f10(i64 %src, i64 %index, float %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: stey %f0, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll index 0cb0474157d01..a4f1820d1204e 100644 --- a/test/CodeGen/SystemZ/fp-move-07.ll +++ b/test/CodeGen/SystemZ/fp-move-07.ll @@ -4,7 +4,7 @@ ; Test the low end of the STD range. define void @f1(double *%src, double %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: std %f0, 0(%r2) ; CHECK: br %r14 store double %val, double *%src @@ -13,7 +13,7 @@ define void @f1(double *%src, double %val) { ; Test the high end of the STD range. define void @f2(double *%src, double %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: std %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 511 @@ -23,7 +23,7 @@ define void @f2(double *%src, double %val) { ; Check the next doubleword up, which should use STDY instead of STD. define void @f3(double *%src, double %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stdy %f0, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 512 @@ -33,7 +33,7 @@ define void @f3(double *%src, double %val) { ; Check the high end of the aligned STDY range. define void @f4(double *%src, double %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stdy %f0, 524280(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 65535 @@ -44,7 +44,7 @@ define void @f4(double *%src, double %val) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f5(double *%src, double %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: std %f0, 0(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define void @f5(double *%src, double %val) { ; Check the high end of the negative aligned STDY range. define void @f6(double *%src, double %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: stdy %f0, -8(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 -1 @@ -65,7 +65,7 @@ define void @f6(double *%src, double %val) { ; Check the low end of the STDY range. define void @f7(double *%src, double %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: stdy %f0, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%src, i64 -65536 @@ -76,7 +76,7 @@ define void @f7(double *%src, double %val) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(double *%src, double %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524296 ; CHECK: std %f0, 0(%r2) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define void @f8(double *%src, double %val) { ; Check that STD allows an index. define void @f9(i64 %src, i64 %index, double %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: std %f0, 4095({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -99,7 +99,7 @@ define void @f9(i64 %src, i64 %index, double %val) { ; Check that STDY allows an index. define void @f10(i64 %src, i64 %index, double %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: stdy %f0, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/fp-move-08.ll b/test/CodeGen/SystemZ/fp-move-08.ll index 448d2ace1762b..88038abc0daba 100644 --- a/test/CodeGen/SystemZ/fp-move-08.ll +++ b/test/CodeGen/SystemZ/fp-move-08.ll @@ -4,7 +4,7 @@ ; Check stores with no offset. define void @f1(i64 %src, double %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) ; CHECK: br %r14 @@ -16,7 +16,7 @@ define void @f1(i64 %src, double %val) { ; Check the highest aligned offset that allows STD for both halves. define void @f2(i64 %src, double %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: std %f0, 4080(%r2) ; CHECK: std %f2, 4088(%r2) ; CHECK: br %r14 @@ -29,7 +29,7 @@ define void @f2(i64 %src, double %val) { ; Check the next doubleword up, which requires a mixture of STD and STDY. define void @f3(i64 %src, double %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: std %f0, 4088(%r2) ; CHECK: stdy %f2, 4096(%r2) ; CHECK: br %r14 @@ -42,7 +42,7 @@ define void @f3(i64 %src, double %val) { ; Check the next doubleword after that, which requires STDY for both halves. define void @f4(i64 %src, double %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stdy %f0, 4096(%r2) ; CHECK: stdy %f2, 4104(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define void @f4(i64 %src, double %val) { ; Check the highest aligned offset that allows STDY for both halves. define void @f5(i64 %src, double %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: stdy %f0, 524272(%r2) ; CHECK: stdy %f2, 524280(%r2) ; CHECK: br %r14 @@ -69,7 +69,7 @@ define void @f5(i64 %src, double %val) { ; Check the next doubleword up, which requires separate address logic. ; Other sequences besides this one would be OK. define void @f6(i64 %src, double %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lay %r1, 524280(%r2) ; CHECK: std %f0, 0(%r1) ; CHECK: std %f2, 8(%r1) @@ -84,7 +84,7 @@ define void @f6(i64 %src, double %val) { ; Check the highest aligned negative offset, which needs a combination of ; STDY and STD. define void @f7(i64 %src, double %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: stdy %f0, -8(%r2) ; CHECK: std %f2, 0(%r2) ; CHECK: br %r14 @@ -97,7 +97,7 @@ define void @f7(i64 %src, double %val) { ; Check the next doubleword down, which requires STDY for both halves. define void @f8(i64 %src, double %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: stdy %f0, -16(%r2) ; CHECK: stdy %f2, -8(%r2) ; CHECK: br %r14 @@ -110,7 +110,7 @@ define void @f8(i64 %src, double %val) { ; Check the lowest offset that allows STDY for both halves. define void @f9(i64 %src, double %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: stdy %f0, -524288(%r2) ; CHECK: stdy %f2, -524280(%r2) ; CHECK: br %r14 @@ -124,7 +124,7 @@ define void @f9(i64 %src, double %val) { ; Check the next doubleword down, which requires separate address logic. ; Other sequences besides this one would be OK. define void @f10(i64 %src, double %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r2, -524296 ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -138,7 +138,7 @@ define void @f10(i64 %src, double %val) { ; Check that indices are allowed. define void @f11(i64 %src, i64 %index, double %val) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: std %f0, 4088({{%r2,%r3|%r3,%r2}}) ; CHECK: stdy %f2, 4096({{%r2,%r3|%r3,%r2}}) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-move-09.ll b/test/CodeGen/SystemZ/fp-move-09.ll new file mode 100644 index 0000000000000..52b2ee2e31abe --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-09.ll @@ -0,0 +1,62 @@ +; Test moves between FPRs and GPRs for z196 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check that moves from i32s to floats can use high registers. +define float @f1(i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: llhh [[REG:%r[0-5]]], 0(%r2) +; CHECK: oihh [[REG]], 16256 +; CHECK: ldgr %f0, [[REG]] +; CHECK: br %r14 + %base = load i16 *%ptr + %ext = zext i16 %base to i32 + %full = or i32 %ext, 1065353216 + %res = bitcast i32 %full to float + ret float %res +} + +; Check that moves from floats to i32s can use high registers. +; This "store the low byte" technique is used by llvmpipe, for example. +define void @f2(float %val, i8 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: lgdr [[REG:%r[0-5]]], %f0 +; CHECK: stch [[REG]], 0(%r2) +; CHECK: br %r14 + %res = bitcast float %val to i32 + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Like f2, but with a conditional store. +define void @f3(float %val, i8 *%ptr, i32 %which) { +; CHECK-LABEL: f3: +; CHECK: cijlh %r3, 0, +; CHECK: lgdr [[REG:%r[0-5]]], %f0 +; CHECK: stch [[REG]], 0(%r2) +; CHECK: br %r14 + %int = bitcast float %val to i32 + %trunc = trunc i32 %int to i8 + %old = load i8 *%ptr + %cmp = icmp eq i32 %which, 0 + %res = select i1 %cmp, i8 %trunc, i8 %old + store i8 %res, i8 *%ptr + ret void +} + +; ...and again with 16-bit memory. +define void @f4(float %val, i16 *%ptr, i32 %which) { +; CHECK-LABEL: f4: +; CHECK: cijlh %r3, 0, +; CHECK: lgdr [[REG:%r[0-5]]], %f0 +; CHECK: sthh [[REG]], 0(%r2) +; CHECK: br %r14 + %int = bitcast float %val to i32 + %trunc = trunc i32 %int to i16 + %old = load i16 *%ptr + %cmp = icmp eq i32 %which, 0 + %res = select i1 %cmp, i16 %trunc, i16 %old + store i16 %res, i16 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll index 68c78ee2da6b0..7562d6bf071bf 100644 --- a/test/CodeGen/SystemZ/fp-mul-01.ll +++ b/test/CodeGen/SystemZ/fp-mul-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare float @foo() + ; Check register multiplication. define float @f1(float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: meebr %f0, %f2 ; CHECK: br %r14 %res = fmul float %f1, %f2 @@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) { ; Check the low end of the MEEB range. define float @f2(float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: meeb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load float *%ptr @@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) { ; Check the high end of the aligned MEEB range. define float @f3(float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: meeb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f4(float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: meeb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define float @f5(float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: meeb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) { ; Check that MEEB allows indices. define float @f6(float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: meeb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) { %res = fmul float %f1, %f2 ret float %res } + +; Check that multiplications of spilled values can use MEEB rather than MEEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: meeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %ret = call float @foo() + + %mul0 = fmul float %ret, %val0 + %mul1 = fmul float %mul0, %val1 + %mul2 = fmul float %mul1, %val2 + %mul3 = fmul float %mul2, %val3 + %mul4 = fmul float %mul3, %val4 + %mul5 = fmul float %mul4, %val5 + %mul6 = fmul float %mul5, %val6 + %mul7 = fmul float %mul6, %val7 + %mul8 = fmul float %mul7, %val8 + %mul9 = fmul float %mul8, %val9 + %mul10 = fmul float %mul9, %val10 + + ret float %mul10 +} diff --git a/test/CodeGen/SystemZ/fp-mul-02.ll b/test/CodeGen/SystemZ/fp-mul-02.ll index ec51a4c1d679e..cf4448fd7dd10 100644 --- a/test/CodeGen/SystemZ/fp-mul-02.ll +++ b/test/CodeGen/SystemZ/fp-mul-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare float @foo() + ; Check register multiplication. define double @f1(float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mdebr %f0, %f2 ; CHECK: br %r14 %f1x = fpext float %f1 to double @@ -15,7 +17,7 @@ define double @f1(float %f1, float %f2) { ; Check the low end of the MDEB range. define double @f2(float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mdeb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load float *%ptr @@ -27,7 +29,7 @@ define double @f2(float %f1, float *%ptr) { ; Check the high end of the aligned MDEB range. define double @f3(float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mdeb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -41,7 +43,7 @@ define double @f3(float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mdeb %f0, 0(%r2) ; CHECK: br %r14 @@ -55,7 +57,7 @@ define double @f4(float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: mdeb %f0, 0(%r2) ; CHECK: br %r14 @@ -69,7 +71,7 @@ define double @f5(float %f1, float *%base) { ; Check that MDEB allows indices. define double @f6(float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mdeb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -81,3 +83,121 @@ define double @f6(float %f1, float *%base, i64 %index) { %res = fmul double %f1x, %f2x ret double %res } + +; Check that multiplications of spilled values can use MDEB rather than MDEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: mdeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %frob0 = fadd float %val0, %val0 + %frob1 = fadd float %val1, %val1 + %frob2 = fadd float %val2, %val2 + %frob3 = fadd float %val3, %val3 + %frob4 = fadd float %val4, %val4 + %frob5 = fadd float %val5, %val5 + %frob6 = fadd float %val6, %val6 + %frob7 = fadd float %val7, %val7 + %frob8 = fadd float %val8, %val8 + %frob9 = fadd float %val9, %val9 + %frob10 = fadd float %val9, %val10 + + store float %frob0, float *%ptr0 + store float %frob1, float *%ptr1 + store float %frob2, float *%ptr2 + store float %frob3, float *%ptr3 + store float %frob4, float *%ptr4 + store float %frob5, float *%ptr5 + store float %frob6, float *%ptr6 + store float %frob7, float *%ptr7 + store float %frob8, float *%ptr8 + store float %frob9, float *%ptr9 + store float %frob10, float *%ptr10 + + %ret = call float @foo() + + %accext0 = fpext float %ret to double + %ext0 = fpext float %frob0 to double + %mul0 = fmul double %accext0, %ext0 + %extra0 = fmul double %mul0, 1.01 + %trunc0 = fptrunc double %extra0 to float + + %accext1 = fpext float %trunc0 to double + %ext1 = fpext float %frob1 to double + %mul1 = fmul double %accext1, %ext1 + %extra1 = fmul double %mul1, 1.11 + %trunc1 = fptrunc double %extra1 to float + + %accext2 = fpext float %trunc1 to double + %ext2 = fpext float %frob2 to double + %mul2 = fmul double %accext2, %ext2 + %extra2 = fmul double %mul2, 1.21 + %trunc2 = fptrunc double %extra2 to float + + %accext3 = fpext float %trunc2 to double + %ext3 = fpext float %frob3 to double + %mul3 = fmul double %accext3, %ext3 + %extra3 = fmul double %mul3, 1.31 + %trunc3 = fptrunc double %extra3 to float + + %accext4 = fpext float %trunc3 to double + %ext4 = fpext float %frob4 to double + %mul4 = fmul double %accext4, %ext4 + %extra4 = fmul double %mul4, 1.41 + %trunc4 = fptrunc double %extra4 to float + + %accext5 = fpext float %trunc4 to double + %ext5 = fpext float %frob5 to double + %mul5 = fmul double %accext5, %ext5 + %extra5 = fmul double %mul5, 1.51 + %trunc5 = fptrunc double %extra5 to float + + %accext6 = fpext float %trunc5 to double + %ext6 = fpext float %frob6 to double + %mul6 = fmul double %accext6, %ext6 + %extra6 = fmul double %mul6, 1.61 + %trunc6 = fptrunc double %extra6 to float + + %accext7 = fpext float %trunc6 to double + %ext7 = fpext float %frob7 to double + %mul7 = fmul double %accext7, %ext7 + %extra7 = fmul double %mul7, 1.71 + %trunc7 = fptrunc double %extra7 to float + + %accext8 = fpext float %trunc7 to double + %ext8 = fpext float %frob8 to double + %mul8 = fmul double %accext8, %ext8 + %extra8 = fmul double %mul8, 1.81 + %trunc8 = fptrunc double %extra8 to float + + %accext9 = fpext float %trunc8 to double + %ext9 = fpext float %frob9 to double + %mul9 = fmul double %accext9, %ext9 + %extra9 = fmul double %mul9, 1.91 + %trunc9 = fptrunc double %extra9 to float + + ret float %trunc9 +} diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll index 9849247deccb0..6d296f07d1f29 100644 --- a/test/CodeGen/SystemZ/fp-mul-03.ll +++ b/test/CodeGen/SystemZ/fp-mul-03.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @foo() + ; Check register multiplication. define double @f1(double %f1, double %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mdbr %f0, %f2 ; CHECK: br %r14 %res = fmul double %f1, %f2 @@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) { ; Check the low end of the MDB range. define double @f2(double %f1, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mdb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load double *%ptr @@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) { ; Check the high end of the aligned MDB range. define double @f3(double %f1, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mdb %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %f1, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mdb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(double %f1, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: mdb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) { ; Check that MDB allows indices. define double @f6(double %f1, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: mdb %f0, 800(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) { %res = fmul double %f1, %f2 ret double %res } + +; Check that multiplications of spilled values can use MDB rather than MDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: mdb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %ret = call double @foo() + + %mul0 = fmul double %ret, %val0 + %mul1 = fmul double %mul0, %val1 + %mul2 = fmul double %mul1, %val2 + %mul3 = fmul double %mul2, %val3 + %mul4 = fmul double %mul3, %val4 + %mul5 = fmul double %mul4, %val5 + %mul6 = fmul double %mul5, %val6 + %mul7 = fmul double %mul6, %val7 + %mul8 = fmul double %mul7, %val8 + %mul9 = fmul double %mul8, %val9 + %mul10 = fmul double %mul9, %val10 + + ret double %mul10 +} diff --git a/test/CodeGen/SystemZ/fp-mul-04.ll b/test/CodeGen/SystemZ/fp-mul-04.ll index 712ead85cbd4b..3c4325e6cbbb4 100644 --- a/test/CodeGen/SystemZ/fp-mul-04.ll +++ b/test/CodeGen/SystemZ/fp-mul-04.ll @@ -2,11 +2,13 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @foo() + ; Check register multiplication. "mxdbr %f0, %f2" is not valid from LLVM's ; point of view, because %f2 is the low register of the FP128 %f0. Pass the ; multiplier in %f4 instead. define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mxdbr %f0, %f4 ; CHECK: std %f0, 0(%r2) ; CHECK: std %f2, 8(%r2) @@ -20,7 +22,7 @@ define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) { ; Check the low end of the MXDB range. define void @f2(double %f1, double *%ptr, fp128 *%dst) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mxdb %f0, 0(%r2) ; CHECK: std %f0, 0(%r3) ; CHECK: std %f2, 8(%r3) @@ -35,7 +37,7 @@ define void @f2(double %f1, double *%ptr, fp128 *%dst) { ; Check the high end of the aligned MXDB range. define void @f3(double %f1, double *%base, fp128 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mxdb %f0, 4088(%r2) ; CHECK: std %f0, 0(%r3) ; CHECK: std %f2, 8(%r3) @@ -52,7 +54,7 @@ define void @f3(double %f1, double *%base, fp128 *%dst) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f4(double %f1, double *%base, fp128 *%dst) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mxdb %f0, 0(%r2) ; CHECK: std %f0, 0(%r3) @@ -69,7 +71,7 @@ define void @f4(double %f1, double *%base, fp128 *%dst) { ; Check negative displacements, which also need separate address logic. define void @f5(double %f1, double *%base, fp128 *%dst) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: mxdb %f0, 0(%r2) ; CHECK: std %f0, 0(%r3) @@ -86,7 +88,7 @@ define void @f5(double %f1, double *%base, fp128 *%dst) { ; Check that MXDB allows indices. define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: mxdb %f0, 800(%r1,%r2) ; CHECK: std %f0, 0(%r4) @@ -101,3 +103,131 @@ define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) { store fp128 %res, fp128 *%dst ret void } + +; Check that multiplications of spilled values can use MXDB rather than MXDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: mxdb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %frob0 = fadd double %val0, %val0 + %frob1 = fadd double %val1, %val1 + %frob2 = fadd double %val2, %val2 + %frob3 = fadd double %val3, %val3 + %frob4 = fadd double %val4, %val4 + %frob5 = fadd double %val5, %val5 + %frob6 = fadd double %val6, %val6 + %frob7 = fadd double %val7, %val7 + %frob8 = fadd double %val8, %val8 + %frob9 = fadd double %val9, %val9 + %frob10 = fadd double %val9, %val10 + + store double %frob0, double *%ptr0 + store double %frob1, double *%ptr1 + store double %frob2, double *%ptr2 + store double %frob3, double *%ptr3 + store double %frob4, double *%ptr4 + store double %frob5, double *%ptr5 + store double %frob6, double *%ptr6 + store double %frob7, double *%ptr7 + store double %frob8, double *%ptr8 + store double %frob9, double *%ptr9 + store double %frob10, double *%ptr10 + + %ret = call double @foo() + + %accext0 = fpext double %ret to fp128 + %ext0 = fpext double %frob0 to fp128 + %mul0 = fmul fp128 %accext0, %ext0 + %const0 = fpext double 1.01 to fp128 + %extra0 = fmul fp128 %mul0, %const0 + %trunc0 = fptrunc fp128 %extra0 to double + + %accext1 = fpext double %trunc0 to fp128 + %ext1 = fpext double %frob1 to fp128 + %mul1 = fmul fp128 %accext1, %ext1 + %const1 = fpext double 1.11 to fp128 + %extra1 = fmul fp128 %mul1, %const1 + %trunc1 = fptrunc fp128 %extra1 to double + + %accext2 = fpext double %trunc1 to fp128 + %ext2 = fpext double %frob2 to fp128 + %mul2 = fmul fp128 %accext2, %ext2 + %const2 = fpext double 1.21 to fp128 + %extra2 = fmul fp128 %mul2, %const2 + %trunc2 = fptrunc fp128 %extra2 to double + + %accext3 = fpext double %trunc2 to fp128 + %ext3 = fpext double %frob3 to fp128 + %mul3 = fmul fp128 %accext3, %ext3 + %const3 = fpext double 1.31 to fp128 + %extra3 = fmul fp128 %mul3, %const3 + %trunc3 = fptrunc fp128 %extra3 to double + + %accext4 = fpext double %trunc3 to fp128 + %ext4 = fpext double %frob4 to fp128 + %mul4 = fmul fp128 %accext4, %ext4 + %const4 = fpext double 1.41 to fp128 + %extra4 = fmul fp128 %mul4, %const4 + %trunc4 = fptrunc fp128 %extra4 to double + + %accext5 = fpext double %trunc4 to fp128 + %ext5 = fpext double %frob5 to fp128 + %mul5 = fmul fp128 %accext5, %ext5 + %const5 = fpext double 1.51 to fp128 + %extra5 = fmul fp128 %mul5, %const5 + %trunc5 = fptrunc fp128 %extra5 to double + + %accext6 = fpext double %trunc5 to fp128 + %ext6 = fpext double %frob6 to fp128 + %mul6 = fmul fp128 %accext6, %ext6 + %const6 = fpext double 1.61 to fp128 + %extra6 = fmul fp128 %mul6, %const6 + %trunc6 = fptrunc fp128 %extra6 to double + + %accext7 = fpext double %trunc6 to fp128 + %ext7 = fpext double %frob7 to fp128 + %mul7 = fmul fp128 %accext7, %ext7 + %const7 = fpext double 1.71 to fp128 + %extra7 = fmul fp128 %mul7, %const7 + %trunc7 = fptrunc fp128 %extra7 to double + + %accext8 = fpext double %trunc7 to fp128 + %ext8 = fpext double %frob8 to fp128 + %mul8 = fmul fp128 %accext8, %ext8 + %const8 = fpext double 1.81 to fp128 + %extra8 = fmul fp128 %mul8, %const8 + %trunc8 = fptrunc fp128 %extra8 to double + + %accext9 = fpext double %trunc8 to fp128 + %ext9 = fpext double %frob9 to fp128 + %mul9 = fmul fp128 %accext9, %ext9 + %const9 = fpext double 1.91 to fp128 + %extra9 = fmul fp128 %mul9, %const9 + %trunc9 = fptrunc fp128 %extra9 to double + + ret double %trunc9 +} diff --git a/test/CodeGen/SystemZ/fp-mul-05.ll b/test/CodeGen/SystemZ/fp-mul-05.ll index df5bc4e707555..0be1fe8b41a00 100644 --- a/test/CodeGen/SystemZ/fp-mul-05.ll +++ b/test/CodeGen/SystemZ/fp-mul-05.ll @@ -4,7 +4,7 @@ ; There is no memory form of 128-bit multiplication. define void @f1(fp128 *%ptr, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: ld %f1, 0(%r2) ; CHECK: ld %f3, 8(%r2) diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll index 8124c680371de..3f631a68b5753 100644 --- a/test/CodeGen/SystemZ/fp-mul-06.ll +++ b/test/CodeGen/SystemZ/fp-mul-06.ll @@ -3,7 +3,7 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: maebr %f4, %f0, %f2 ; CHECK: ler %f0, %f4 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define float @f1(float %f1, float %f2, float %acc) { } define float @f2(float %f1, float *%ptr, float %acc) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: maeb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 ; CHECK: br %r14 @@ -22,7 +22,7 @@ define float @f2(float %f1, float *%ptr, float %acc) { } define float @f3(float %f1, float *%base, float %acc) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: maeb %f2, %f0, 4092(%r2) ; CHECK: ler %f0, %f2 ; CHECK: br %r14 @@ -36,7 +36,7 @@ define float @f4(float %f1, float *%base, float %acc) { ; The important thing here is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: maeb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 @@ -51,7 +51,7 @@ define float @f5(float %f1, float *%base, float %acc) { ; Here too the important thing is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: maeb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 @@ -63,7 +63,7 @@ define float @f5(float %f1, float *%base, float %acc) { } define float @f6(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 0(%r1,%r2) ; CHECK: ler %f0, %f2 @@ -75,7 +75,7 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { } define float @f7(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) ; CHECK: ler %f0, %f2 @@ -88,7 +88,7 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { } define float @f8(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: maeb %f2, %f0, 0(%r1) diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll index b8e44830f3311..e4f5904472158 100644 --- a/test/CodeGen/SystemZ/fp-mul-07.ll +++ b/test/CodeGen/SystemZ/fp-mul-07.ll @@ -3,7 +3,7 @@ declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: madbr %f4, %f0, %f2 ; CHECK: ldr %f0, %f4 ; CHECK: br %r14 @@ -12,7 +12,7 @@ define double @f1(double %f1, double %f2, double %acc) { } define double @f2(double %f1, double *%ptr, double %acc) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: madb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 @@ -22,7 +22,7 @@ define double @f2(double %f1, double *%ptr, double %acc) { } define double @f3(double %f1, double *%base, double %acc) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: madb %f2, %f0, 4088(%r2) ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 @@ -36,7 +36,7 @@ define double @f4(double %f1, double *%base, double %acc) { ; The important thing here is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: madb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 @@ -51,7 +51,7 @@ define double @f5(double %f1, double *%base, double %acc) { ; Here too the important thing is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: madb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 @@ -63,7 +63,7 @@ define double @f5(double %f1, double *%base, double %acc) { } define double @f6(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: madb %f2, %f0, 0(%r1,%r2) ; CHECK: ldr %f0, %f2 @@ -75,7 +75,7 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) { } define double @f7(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 3 ; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) ; CHECK: ldr %f0, %f2 @@ -88,7 +88,7 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) { } define double @f8(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sllg %r1, %r3, 3 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: madb %f2, %f0, 0(%r1) diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll index 5c1474063a16c..ab5fcb2cbefd4 100644 --- a/test/CodeGen/SystemZ/fp-mul-08.ll +++ b/test/CodeGen/SystemZ/fp-mul-08.ll @@ -3,7 +3,7 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msebr %f4, %f0, %f2 ; CHECK: ler %f0, %f4 ; CHECK: br %r14 @@ -13,7 +13,7 @@ define float @f1(float %f1, float %f2, float %acc) { } define float @f2(float %f1, float *%ptr, float %acc) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mseb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 ; CHECK: br %r14 @@ -24,7 +24,7 @@ define float @f2(float %f1, float *%ptr, float %acc) { } define float @f3(float %f1, float *%base, float %acc) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mseb %f2, %f0, 4092(%r2) ; CHECK: ler %f0, %f2 ; CHECK: br %r14 @@ -39,7 +39,7 @@ define float @f4(float %f1, float *%base, float %acc) { ; The important thing here is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mseb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 @@ -55,7 +55,7 @@ define float @f5(float %f1, float *%base, float %acc) { ; Here too the important thing is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: mseb %f2, %f0, 0(%r2) ; CHECK: ler %f0, %f2 @@ -68,7 +68,7 @@ define float @f5(float %f1, float *%base, float %acc) { } define float @f6(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 0(%r1,%r2) ; CHECK: ler %f0, %f2 @@ -81,7 +81,7 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { } define float @f7(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) ; CHECK: ler %f0, %f2 @@ -95,7 +95,7 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { } define float @f8(float %f1, float *%base, i64 %index, float %acc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: mseb %f2, %f0, 0(%r1) diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll index bcae1e35e6eb6..7e740968a8c78 100644 --- a/test/CodeGen/SystemZ/fp-mul-09.ll +++ b/test/CodeGen/SystemZ/fp-mul-09.ll @@ -3,7 +3,7 @@ declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msdbr %f4, %f0, %f2 ; CHECK: ldr %f0, %f4 ; CHECK: br %r14 @@ -13,7 +13,7 @@ define double @f1(double %f1, double %f2, double %acc) { } define double @f2(double %f1, double *%ptr, double %acc) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: msdb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 @@ -24,7 +24,7 @@ define double @f2(double %f1, double *%ptr, double %acc) { } define double @f3(double %f1, double *%base, double %acc) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: msdb %f2, %f0, 4088(%r2) ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 @@ -39,7 +39,7 @@ define double @f4(double %f1, double *%base, double %acc) { ; The important thing here is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: msdb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 @@ -55,7 +55,7 @@ define double @f5(double %f1, double *%base, double %acc) { ; Here too the important thing is that we don't generate an out-of-range ; displacement. Other sequences besides this one would be OK. ; -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: msdb %f2, %f0, 0(%r2) ; CHECK: ldr %f0, %f2 @@ -68,7 +68,7 @@ define double @f5(double %f1, double *%base, double %acc) { } define double @f6(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: msdb %f2, %f0, 0(%r1,%r2) ; CHECK: ldr %f0, %f2 @@ -81,7 +81,7 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) { } define double @f7(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 3 ; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) ; CHECK: ldr %f0, %f2 @@ -95,7 +95,7 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) { } define double @f8(double %f1, double *%base, i64 %index, double %acc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sllg %r1, %r3, 3 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: msdb %f2, %f0, 0(%r1) diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll index 09a4a53e41d1c..1cc6d816fee3f 100644 --- a/test/CodeGen/SystemZ/fp-neg-01.ll +++ b/test/CodeGen/SystemZ/fp-neg-01.ll @@ -4,7 +4,7 @@ ; Test f32. define float @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lcebr %f0, %f0 ; CHECK: br %r14 %res = fsub float -0.0, %f @@ -13,7 +13,7 @@ define float @f1(float %f) { ; Test f64. define double @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lcdbr %f0, %f0 ; CHECK: br %r14 %res = fsub double -0.0, %f @@ -24,7 +24,7 @@ define double @f2(double %f) { ; be better implemented using an XI on the upper byte. Do some extra ; processing so that using FPRs is unequivocally better. define void @f3(fp128 *%ptr, fp128 *%ptr2) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lcxbr ; CHECK: dxbr ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll index 20325c3366486..565db5ad4f513 100644 --- a/test/CodeGen/SystemZ/fp-round-01.ll +++ b/test/CodeGen/SystemZ/fp-round-01.ll @@ -1,32 +1,31 @@ -; Test rint()-like rounding, with non-integer values triggering an -; inexact condition. +; Test rounding functions for z10. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -; Test f32. +; Test rint for f32. declare float @llvm.rint.f32(float %f) define float @f1(float %f) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: fiebr %f0, 0, %f0 ; CHECK: br %r14 %res = call float @llvm.rint.f32(float %f) ret float %res } -; Test f64. +; Test rint for f64. declare double @llvm.rint.f64(double %f) define double @f2(double %f) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: fidbr %f0, 0, %f0 ; CHECK: br %r14 %res = call double @llvm.rint.f64(double %f) ret double %res } -; Test f128. +; Test rint for f128. declare fp128 @llvm.rint.f128(fp128 %f) define void @f3(fp128 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: fixbr %f0, 0, %f0 ; CHECK: br %r14 %src = load fp128 *%ptr @@ -34,3 +33,118 @@ define void @f3(fp128 *%ptr) { store fp128 %res, fp128 *%ptr ret void } + +; Test nearbyint for f32. +declare float @llvm.nearbyint.f32(float %f) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: brasl %r14, nearbyintf@PLT +; CHECK: br %r14 + %res = call float @llvm.nearbyint.f32(float %f) + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.nearbyint.f64(double %f) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: brasl %r14, nearbyint@PLT +; CHECK: br %r14 + %res = call double @llvm.nearbyint.f64(double %f) + ret double %res +} + +; Test nearbyint for f128: omitted for now because we cannot handle +; indirect arguments. + +; Test floor for f32. +declare float @llvm.floor.f32(float %f) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, floorf@PLT +; CHECK: br %r14 + %res = call float @llvm.floor.f32(float %f) + ret float %res +} + +; Test floor for f64. +declare double @llvm.floor.f64(double %f) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, floor@PLT +; CHECK: br %r14 + %res = call double @llvm.floor.f64(double %f) + ret double %res +} + +; Test floor for f128: omitted for now because we cannot handle +; indirect arguments. + +; Test ceil for f32. +declare float @llvm.ceil.f32(float %f) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: brasl %r14, ceilf@PLT +; CHECK: br %r14 + %res = call float @llvm.ceil.f32(float %f) + ret float %res +} + +; Test ceil for f64. +declare double @llvm.ceil.f64(double %f) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, ceil@PLT +; CHECK: br %r14 + %res = call double @llvm.ceil.f64(double %f) + ret double %res +} + +; Test ceil for f128: omitted for now because we cannot handle +; indirect arguments. + +; Test trunc for f32. +declare float @llvm.trunc.f32(float %f) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, truncf@PLT +; CHECK: br %r14 + %res = call float @llvm.trunc.f32(float %f) + ret float %res +} + +; Test trunc for f64. +declare double @llvm.trunc.f64(double %f) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, trunc@PLT +; CHECK: br %r14 + %res = call double @llvm.trunc.f64(double %f) + ret double %res +} + +; Test trunc for f128: omitted for now because we cannot handle +; indirect arguments. + +; Test round for f32. +declare float @llvm.round.f32(float %f) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, roundf@PLT +; CHECK: br %r14 + %res = call float @llvm.round.f32(float %f) + ret float %res +} + +; Test round for f64. +declare double @llvm.round.f64(double %f) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: brasl %r14, round@PLT +; CHECK: br %r14 + %res = call double @llvm.round.f64(double %f) + ret double %res +} + +; Test round for f128: omitted for now because we cannot handle +; indirect arguments. diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll new file mode 100644 index 0000000000000..d79c9c47050ae --- /dev/null +++ b/test/CodeGen/SystemZ/fp-round-02.ll @@ -0,0 +1,195 @@ +; Test rounding functions for z196 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test rint for f32. +declare float @llvm.rint.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebr %f0, 0, %f0 +; CHECK: br %r14 + %res = call float @llvm.rint.f32(float %f) + ret float %res +} + +; Test rint for f64. +declare double @llvm.rint.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: fidbr %f0, 0, %f0 +; CHECK: br %r14 + %res = call double @llvm.rint.f64(double %f) + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.rint.f128(fp128 %f) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.rint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.nearbyint.f32(float %f) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: fiebra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.nearbyint.f32(float %f) + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.nearbyint.f64(double %f) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: fidbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.nearbyint.f64(double %f) + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.nearbyint.f128(fp128 %f) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: fixbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.nearbyint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.floor.f32(float %f) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: fiebra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.floor.f32(float %f) + ret float %res +} + +; Test floor for f64. +declare double @llvm.floor.f64(double %f) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: fidbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.floor.f64(double %f) + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.floor.f128(fp128 %f) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: fixbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.floor.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.ceil.f32(float %f) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: fiebra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.ceil.f32(float %f) + ret float %res +} + +; Test ceil for f64. +declare double @llvm.ceil.f64(double %f) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: fidbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.ceil.f64(double %f) + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.ceil.f128(fp128 %f) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: fixbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.ceil.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.trunc.f32(float %f) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.trunc.f32(float %f) + ret float %res +} + +; Test trunc for f64. +declare double @llvm.trunc.f64(double %f) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: fidbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.trunc.f64(double %f) + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.trunc.f128(fp128 %f) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: fixbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.trunc.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.round.f32(float %f) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.round.f32(float %f) + ret float %res +} + +; Test round for f64. +declare double @llvm.round.f64(double %f) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: fidbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.round.f64(double %f) + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.round.f128(fp128 %f) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: fixbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.round.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll index 7ed27f56d0d05..7465af456b837 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-01.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -2,11 +2,12 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -declare float @llvm.sqrt.f32(float %f) +declare float @llvm.sqrt.f32(float) +declare float @sqrtf(float) ; Check register square root. define float @f1(float %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sqebr %f0, %f0 ; CHECK: br %r14 %res = call float @llvm.sqrt.f32(float %val) @@ -15,7 +16,7 @@ define float @f1(float %val) { ; Check the low end of the SQEB range. define float @f2(float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sqeb %f0, 0(%r2) ; CHECK: br %r14 %val = load float *%ptr @@ -25,7 +26,7 @@ define float @f2(float *%ptr) { ; Check the high end of the aligned SQEB range. define float @f3(float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sqeb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -37,7 +38,7 @@ define float @f3(float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f4(float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: sqeb %f0, 0(%r2) ; CHECK: br %r14 @@ -49,7 +50,7 @@ define float @f4(float *%base) { ; Check negative displacements, which also need separate address logic. define float @f5(float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: sqeb %f0, 0(%r2) ; CHECK: br %r14 @@ -61,7 +62,7 @@ define float @f5(float *%base) { ; Check that SQEB allows indices. define float @f6(float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: sqeb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -71,3 +72,98 @@ define float @f6(float *%base, i64 %index) { %res = call float @llvm.sqrt.f32(float %val) ret float %res } + +; Test a case where we spill the source of at least one SQEBR. We want +; to use SQEB if possible. +define void @f7(float *%ptr) { +; CHECK-LABEL: f7: +; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile float *%ptr + %val1 = load volatile float *%ptr + %val2 = load volatile float *%ptr + %val3 = load volatile float *%ptr + %val4 = load volatile float *%ptr + %val5 = load volatile float *%ptr + %val6 = load volatile float *%ptr + %val7 = load volatile float *%ptr + %val8 = load volatile float *%ptr + %val9 = load volatile float *%ptr + %val10 = load volatile float *%ptr + %val11 = load volatile float *%ptr + %val12 = load volatile float *%ptr + %val13 = load volatile float *%ptr + %val14 = load volatile float *%ptr + %val15 = load volatile float *%ptr + %val16 = load volatile float *%ptr + + %sqrt0 = call float @llvm.sqrt.f32(float %val0) + %sqrt1 = call float @llvm.sqrt.f32(float %val1) + %sqrt2 = call float @llvm.sqrt.f32(float %val2) + %sqrt3 = call float @llvm.sqrt.f32(float %val3) + %sqrt4 = call float @llvm.sqrt.f32(float %val4) + %sqrt5 = call float @llvm.sqrt.f32(float %val5) + %sqrt6 = call float @llvm.sqrt.f32(float %val6) + %sqrt7 = call float @llvm.sqrt.f32(float %val7) + %sqrt8 = call float @llvm.sqrt.f32(float %val8) + %sqrt9 = call float @llvm.sqrt.f32(float %val9) + %sqrt10 = call float @llvm.sqrt.f32(float %val10) + %sqrt11 = call float @llvm.sqrt.f32(float %val11) + %sqrt12 = call float @llvm.sqrt.f32(float %val12) + %sqrt13 = call float @llvm.sqrt.f32(float %val13) + %sqrt14 = call float @llvm.sqrt.f32(float %val14) + %sqrt15 = call float @llvm.sqrt.f32(float %val15) + %sqrt16 = call float @llvm.sqrt.f32(float %val16) + + store volatile float %val0, float *%ptr + store volatile float %val1, float *%ptr + store volatile float %val2, float *%ptr + store volatile float %val3, float *%ptr + store volatile float %val4, float *%ptr + store volatile float %val5, float *%ptr + store volatile float %val6, float *%ptr + store volatile float %val7, float *%ptr + store volatile float %val8, float *%ptr + store volatile float %val9, float *%ptr + store volatile float %val10, float *%ptr + store volatile float %val11, float *%ptr + store volatile float %val12, float *%ptr + store volatile float %val13, float *%ptr + store volatile float %val14, float *%ptr + store volatile float %val15, float *%ptr + store volatile float %val16, float *%ptr + + store volatile float %sqrt0, float *%ptr + store volatile float %sqrt1, float *%ptr + store volatile float %sqrt2, float *%ptr + store volatile float %sqrt3, float *%ptr + store volatile float %sqrt4, float *%ptr + store volatile float %sqrt5, float *%ptr + store volatile float %sqrt6, float *%ptr + store volatile float %sqrt7, float *%ptr + store volatile float %sqrt8, float *%ptr + store volatile float %sqrt9, float *%ptr + store volatile float %sqrt10, float *%ptr + store volatile float %sqrt11, float *%ptr + store volatile float %sqrt12, float *%ptr + store volatile float %sqrt13, float *%ptr + store volatile float %sqrt14, float *%ptr + store volatile float %sqrt15, float *%ptr + store volatile float %sqrt16, float *%ptr + + ret void +} + +; Check that a call to the normal sqrtf function is lowered. +define float @f8(float %dummy, float %val) { +; CHECK-LABEL: f8: +; CHECK: sqebr %f0, %f2 +; CHECK: cebr %f0, %f0 +; CHECK: jo [[LABEL:\.L.*]] +; CHECK: br %r14 +; CHECK: [[LABEL]]: +; CHECK: ler %f0, %f2 +; CHECK: jg sqrtf@PLT + %res = tail call float @sqrtf(float %val) + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll index 22a91ad2f4f79..66ffd19d6c319 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-02.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll @@ -3,10 +3,11 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s declare double @llvm.sqrt.f64(double %f) +declare double @sqrt(double) ; Check register square root. define double @f1(double %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sqdbr %f0, %f0 ; CHECK: br %r14 %res = call double @llvm.sqrt.f64(double %val) @@ -15,7 +16,7 @@ define double @f1(double %val) { ; Check the low end of the SQDB range. define double @f2(double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sqdb %f0, 0(%r2) ; CHECK: br %r14 %val = load double *%ptr @@ -25,7 +26,7 @@ define double @f2(double *%ptr) { ; Check the high end of the aligned SQDB range. define double @f3(double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sqdb %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -37,7 +38,7 @@ define double @f3(double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: sqdb %f0, 0(%r2) ; CHECK: br %r14 @@ -49,7 +50,7 @@ define double @f4(double *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: sqdb %f0, 0(%r2) ; CHECK: br %r14 @@ -61,7 +62,7 @@ define double @f5(double *%base) { ; Check that SQDB allows indices. define double @f6(double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: sqdb %f0, 800(%r1,%r2) ; CHECK: br %r14 @@ -71,3 +72,98 @@ define double @f6(double *%base, i64 %index) { %res = call double @llvm.sqrt.f64(double %val) ret double %res } + +; Test a case where we spill the source of at least one SQDBR. We want +; to use SQDB if possible. +define void @f7(double *%ptr) { +; CHECK-LABEL: f7: +; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %val0 = load volatile double *%ptr + %val1 = load volatile double *%ptr + %val2 = load volatile double *%ptr + %val3 = load volatile double *%ptr + %val4 = load volatile double *%ptr + %val5 = load volatile double *%ptr + %val6 = load volatile double *%ptr + %val7 = load volatile double *%ptr + %val8 = load volatile double *%ptr + %val9 = load volatile double *%ptr + %val10 = load volatile double *%ptr + %val11 = load volatile double *%ptr + %val12 = load volatile double *%ptr + %val13 = load volatile double *%ptr + %val14 = load volatile double *%ptr + %val15 = load volatile double *%ptr + %val16 = load volatile double *%ptr + + %sqrt0 = call double @llvm.sqrt.f64(double %val0) + %sqrt1 = call double @llvm.sqrt.f64(double %val1) + %sqrt2 = call double @llvm.sqrt.f64(double %val2) + %sqrt3 = call double @llvm.sqrt.f64(double %val3) + %sqrt4 = call double @llvm.sqrt.f64(double %val4) + %sqrt5 = call double @llvm.sqrt.f64(double %val5) + %sqrt6 = call double @llvm.sqrt.f64(double %val6) + %sqrt7 = call double @llvm.sqrt.f64(double %val7) + %sqrt8 = call double @llvm.sqrt.f64(double %val8) + %sqrt9 = call double @llvm.sqrt.f64(double %val9) + %sqrt10 = call double @llvm.sqrt.f64(double %val10) + %sqrt11 = call double @llvm.sqrt.f64(double %val11) + %sqrt12 = call double @llvm.sqrt.f64(double %val12) + %sqrt13 = call double @llvm.sqrt.f64(double %val13) + %sqrt14 = call double @llvm.sqrt.f64(double %val14) + %sqrt15 = call double @llvm.sqrt.f64(double %val15) + %sqrt16 = call double @llvm.sqrt.f64(double %val16) + + store volatile double %val0, double *%ptr + store volatile double %val1, double *%ptr + store volatile double %val2, double *%ptr + store volatile double %val3, double *%ptr + store volatile double %val4, double *%ptr + store volatile double %val5, double *%ptr + store volatile double %val6, double *%ptr + store volatile double %val7, double *%ptr + store volatile double %val8, double *%ptr + store volatile double %val9, double *%ptr + store volatile double %val10, double *%ptr + store volatile double %val11, double *%ptr + store volatile double %val12, double *%ptr + store volatile double %val13, double *%ptr + store volatile double %val14, double *%ptr + store volatile double %val15, double *%ptr + store volatile double %val16, double *%ptr + + store volatile double %sqrt0, double *%ptr + store volatile double %sqrt1, double *%ptr + store volatile double %sqrt2, double *%ptr + store volatile double %sqrt3, double *%ptr + store volatile double %sqrt4, double *%ptr + store volatile double %sqrt5, double *%ptr + store volatile double %sqrt6, double *%ptr + store volatile double %sqrt7, double *%ptr + store volatile double %sqrt8, double *%ptr + store volatile double %sqrt9, double *%ptr + store volatile double %sqrt10, double *%ptr + store volatile double %sqrt11, double *%ptr + store volatile double %sqrt12, double *%ptr + store volatile double %sqrt13, double *%ptr + store volatile double %sqrt14, double *%ptr + store volatile double %sqrt15, double *%ptr + store volatile double %sqrt16, double *%ptr + + ret void +} + +; Check that a call to the normal sqrt function is lowered. +define double @f8(double %dummy, double %val) { +; CHECK-LABEL: f8: +; CHECK: sqdbr %f0, %f2 +; CHECK: cdbr %f0, %f0 +; CHECK: jo [[LABEL:\.L.*]] +; CHECK: br %r14 +; CHECK: [[LABEL]]: +; CHECK: ldr %f0, %f2 +; CHECK: jg sqrt@PLT + %res = tail call double @sqrt(double %val) + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-03.ll b/test/CodeGen/SystemZ/fp-sqrt-03.ll index 1b49af41254f7..71426440aca3b 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-03.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-03.ll @@ -6,7 +6,7 @@ declare fp128 @llvm.sqrt.f128(fp128 %f) ; There's no memory form of SQXBR. define void @f1(fp128 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ld %f0, 0(%r2) ; CHECK: ld %f2, 8(%r2) ; CHECK: sqxbr %f0, %f0 diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll index b03f04bd017ef..76f46f6267053 100644 --- a/test/CodeGen/SystemZ/fp-sub-01.ll +++ b/test/CodeGen/SystemZ/fp-sub-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare float @foo() + ; Check register subtraction. define float @f1(float %f1, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sebr %f0, %f2 ; CHECK: br %r14 %res = fsub float %f1, %f2 @@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) { ; Check the low end of the SEB range. define float @f2(float %f1, float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: seb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load float *%ptr @@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) { ; Check the high end of the aligned SEB range. define float @f3(float %f1, float *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: seb %f0, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr float *%base, i64 1023 @@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define float @f4(float %f1, float *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: seb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) { ; Check negative displacements, which also need separate address logic. define float @f5(float %f1, float *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: seb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) { ; Check that SEB allows indices. define float @f6(float %f1, float *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: seb %f0, 400(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) { %res = fsub float %f1, %f2 ret float %res } + +; Check that subtractions of spilled values can use SEB rather than SEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: seb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + %ptr10 = getelementptr float *%ptr0, i64 20 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + %val10 = load float *%ptr10 + + %ret = call float @foo() + + %sub0 = fsub float %ret, %val0 + %sub1 = fsub float %sub0, %val1 + %sub2 = fsub float %sub1, %val2 + %sub3 = fsub float %sub2, %val3 + %sub4 = fsub float %sub3, %val4 + %sub5 = fsub float %sub4, %val5 + %sub6 = fsub float %sub5, %val6 + %sub7 = fsub float %sub6, %val7 + %sub8 = fsub float %sub7, %val8 + %sub9 = fsub float %sub8, %val9 + %sub10 = fsub float %sub9, %val10 + + ret float %sub10 +} diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll index bf9848c2fd511..99cafed8d08b6 100644 --- a/test/CodeGen/SystemZ/fp-sub-02.ll +++ b/test/CodeGen/SystemZ/fp-sub-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @foo() + ; Check register subtraction. define double @f1(double %f1, double %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sdbr %f0, %f2 ; CHECK: br %r14 %res = fsub double %f1, %f2 @@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) { ; Check the low end of the SDB range. define double @f2(double %f1, double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sdb %f0, 0(%r2) ; CHECK: br %r14 %f2 = load double *%ptr @@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) { ; Check the high end of the aligned SDB range. define double @f3(double %f1, double *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sdb %f0, 4088(%r2) ; CHECK: br %r14 %ptr = getelementptr double *%base, i64 511 @@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %f1, double *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: sdb %f0, 0(%r2) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) { ; Check negative displacements, which also need separate address logic. define double @f5(double %f1, double *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -8 ; CHECK: sdb %f0, 0(%r2) ; CHECK: br %r14 @@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) { ; Check that SDB allows indices. define double @f6(double %f1, double *%base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 3 ; CHECK: sdb %f0, 800(%r1,%r2) ; CHECK: br %r14 @@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) { %res = fsub double %f1, %f2 ret double %res } + +; Check that subtractions of spilled values can use SDB rather than SDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: sdb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + %ptr10 = getelementptr double *%ptr0, i64 20 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + %val10 = load double *%ptr10 + + %ret = call double @foo() + + %sub0 = fsub double %ret, %val0 + %sub1 = fsub double %sub0, %val1 + %sub2 = fsub double %sub1, %val2 + %sub3 = fsub double %sub2, %val3 + %sub4 = fsub double %sub3, %val4 + %sub5 = fsub double %sub4, %val5 + %sub6 = fsub double %sub5, %val6 + %sub7 = fsub double %sub6, %val7 + %sub8 = fsub double %sub7, %val8 + %sub9 = fsub double %sub8, %val9 + %sub10 = fsub double %sub9, %val10 + + ret double %sub10 +} diff --git a/test/CodeGen/SystemZ/fp-sub-03.ll b/test/CodeGen/SystemZ/fp-sub-03.ll index 82bb94dd28b38..a1404c4ff0e76 100644 --- a/test/CodeGen/SystemZ/fp-sub-03.ll +++ b/test/CodeGen/SystemZ/fp-sub-03.ll @@ -4,7 +4,7 @@ ; There is no memory form of 128-bit subtraction. define void @f1(fp128 *%ptr, float %f2) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lxebr %f0, %f0 ; CHECK: ld %f1, 0(%r2) ; CHECK: ld %f3, 8(%r2) diff --git a/test/CodeGen/SystemZ/frame-01.ll b/test/CodeGen/SystemZ/frame-01.ll index 0d343128c4cd8..f61836ca8552b 100644 --- a/test/CodeGen/SystemZ/frame-01.ll +++ b/test/CodeGen/SystemZ/frame-01.ll @@ -3,9 +3,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare void @foo(i32 *) + ; The CFA offset is 160 (the caller-allocated part of the frame) + 168. define void @f1(i64 %x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aghi %r15, -168 ; CHECK: .cfi_def_cfa_offset 328 ; CHECK: stg %r2, 160(%r15) @@ -18,18 +20,18 @@ define void @f1(i64 %x) { ; Check frames of size 32760, which is the largest size that can be both ; allocated and freed using AGHI. This size is big enough to require -; an emergency spill slot at 160(%r15), for instructions with unsigned +; two emergency spill slots at 160(%r15), for instructions with unsigned ; 12-bit offsets that end up being out of range. Fill the remaining -; 32760 - 168 bytes by allocating (32760 - 168) / 8 = 4074 doublewords. +; 32760 - 176 bytes by allocating (32760 - 176) / 8 = 4073 doublewords. define void @f2(i64 %x) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r15, -32760 ; CHECK: .cfi_def_cfa_offset 32920 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: aghi %r15, 32760 ; CHECK: br %r14 - %y = alloca [4074 x i64], align 8 - %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0 + %y = alloca [4073 x i64], align 8 + %ptr = getelementptr inbounds [4073 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } @@ -37,14 +39,14 @@ define void @f2(i64 %x) { ; Allocate one more doubleword. This is the one frame size that we can ; allocate using AGHI but must free using AGFI. define void @f3(i64 %x) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r15, -32768 ; CHECK: .cfi_def_cfa_offset 32928 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: agfi %r15, 32768 ; CHECK: br %r14 - %y = alloca [4075 x i64], align 8 - %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0 + %y = alloca [4074 x i64], align 8 + %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } @@ -52,14 +54,14 @@ define void @f3(i64 %x) { ; Allocate another doubleword on top of that. The allocation and free ; must both use AGFI. define void @f4(i64 %x) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r15, -32776 ; CHECK: .cfi_def_cfa_offset 32936 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: agfi %r15, 32776 ; CHECK: br %r14 - %y = alloca [4076 x i64], align 8 - %ptr = getelementptr inbounds [4076 x i64]* %y, i64 0, i64 0 + %y = alloca [4075 x i64], align 8 + %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } @@ -67,13 +69,13 @@ define void @f4(i64 %x) { ; The largest size that can be both allocated and freed using AGFI. ; At this point the frame is too big to represent properly in the CFI. define void @f5(i64 %x) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r15, -2147483640 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: agfi %r15, 2147483640 ; CHECK: br %r14 - %y = alloca [268435434 x i64], align 8 - %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0 + %y = alloca [268435433 x i64], align 8 + %ptr = getelementptr inbounds [268435433 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } @@ -81,14 +83,14 @@ define void @f5(i64 %x) { ; The only frame size that can be allocated using a single AGFI but which ; must be freed using two instructions. define void @f6(i64 %x) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r15, -2147483648 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: agfi %r15, 2147483640 ; CHECK: aghi %r15, 8 ; CHECK: br %r14 - %y = alloca [268435435 x i64], align 8 - %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0 + %y = alloca [268435434 x i64], align 8 + %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } @@ -96,15 +98,29 @@ define void @f6(i64 %x) { ; The smallest frame size that needs two instructions to both allocate ; and free the frame. define void @f7(i64 %x) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r15, -2147483648 ; CHECK: aghi %r15, -8 -; CHECK: stg %r2, 168(%r15) +; CHECK: stg %r2, 176(%r15) ; CHECK: agfi %r15, 2147483640 ; CHECK: aghi %r15, 16 ; CHECK: br %r14 - %y = alloca [268435436 x i64], align 8 - %ptr = getelementptr inbounds [268435436 x i64]* %y, i64 0, i64 0 + %y = alloca [268435435 x i64], align 8 + %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } + +; Make sure that LA can be rematerialized. +define void @f8() { +; CHECK-LABEL: f8: +; CHECK: la %r2, 164(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: la %r2, 164(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + call void @foo(i32 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/frame-02.ll b/test/CodeGen/SystemZ/frame-02.ll index 589703ec0e74e..9a7f8eac9ebad 100644 --- a/test/CodeGen/SystemZ/frame-02.ll +++ b/test/CodeGen/SystemZ/frame-02.ll @@ -7,7 +7,7 @@ ; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 ; (the caller-allocated part of the frame) + 224. define void @f1(float *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aghi %r15, -224 ; CHECK: .cfi_def_cfa_offset 384 ; CHECK: std %f8, 216(%r15) @@ -91,7 +91,7 @@ define void @f1(float *%ptr) { ; Like f1, but requires one fewer FPR. We allocate in numerical order, ; so %f15 is the one that gets dropped. define void @f2(float *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r15, -216 ; CHECK: .cfi_def_cfa_offset 376 ; CHECK: std %f8, 208(%r15) @@ -169,7 +169,7 @@ define void @f2(float *%ptr) { ; Like f1, but should require only one call-saved FPR. define void @f3(float *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r15, -168 ; CHECK: .cfi_def_cfa_offset 328 ; CHECK: std %f8, 160(%r15) @@ -218,7 +218,7 @@ define void @f3(float *%ptr) { ; This function should use all call-clobbered FPRs but no call-saved ones. ; It shouldn't need to create a frame. define void @f4(float *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r15 ; CHECK-NOT: %f8 ; CHECK-NOT: %f9 diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll index 3c4a49977a123..db146c7c985df 100644 --- a/test/CodeGen/SystemZ/frame-03.ll +++ b/test/CodeGen/SystemZ/frame-03.ll @@ -9,7 +9,7 @@ ; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 ; (the caller-allocated part of the frame) + 224. define void @f1(double *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aghi %r15, -224 ; CHECK: .cfi_def_cfa_offset 384 ; CHECK: std %f8, 216(%r15) @@ -93,7 +93,7 @@ define void @f1(double *%ptr) { ; Like f1, but requires one fewer FPR. We allocate in numerical order, ; so %f15 is the one that gets dropped. define void @f2(double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r15, -216 ; CHECK: .cfi_def_cfa_offset 376 ; CHECK: std %f8, 208(%r15) @@ -171,7 +171,7 @@ define void @f2(double *%ptr) { ; Like f1, but should require only one call-saved FPR. define void @f3(double *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r15, -168 ; CHECK: .cfi_def_cfa_offset 328 ; CHECK: std %f8, 160(%r15) @@ -220,7 +220,7 @@ define void @f3(double *%ptr) { ; This function should use all call-clobbered FPRs but no call-saved ones. ; It shouldn't need to create a frame. define void @f4(double *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r15 ; CHECK-NOT: %f8 ; CHECK-NOT: %f9 diff --git a/test/CodeGen/SystemZ/frame-04.ll b/test/CodeGen/SystemZ/frame-04.ll index 360f85cde322b..93c59a3bc15fc 100644 --- a/test/CodeGen/SystemZ/frame-04.ll +++ b/test/CodeGen/SystemZ/frame-04.ll @@ -8,7 +8,7 @@ ; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 ; (the caller-allocated part of the frame) + 224. define void @f1(fp128 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aghi %r15, -224 ; CHECK: .cfi_def_cfa_offset 384 ; CHECK: std %f8, 216(%r15) @@ -68,7 +68,7 @@ define void @f1(fp128 *%ptr) { ; Like f1, but requires one fewer FPR pair. We allocate in numerical order, ; so %f13+%f15 is the pair that gets dropped. define void @f2(fp128 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r15, -208 ; CHECK: .cfi_def_cfa_offset 368 ; CHECK: std %f8, 200(%r15) @@ -121,7 +121,7 @@ define void @f2(fp128 *%ptr) { ; Like f1, but requires only one call-saved FPR pair. We allocate in ; numerical order so the pair should be %f8+%f10. define void @f3(fp128 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r15, -176 ; CHECK: .cfi_def_cfa_offset 336 ; CHECK: std %f8, 168(%r15) @@ -160,7 +160,7 @@ define void @f3(fp128 *%ptr) { ; This function should use all call-clobbered FPRs but no call-saved ones. ; It shouldn't need to create a frame. define void @f4(fp128 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r15 ; CHECK-NOT: %f8 ; CHECK-NOT: %f9 diff --git a/test/CodeGen/SystemZ/frame-05.ll b/test/CodeGen/SystemZ/frame-05.ll index 3a159fcd5941c..f95284deeb798 100644 --- a/test/CodeGen/SystemZ/frame-05.ll +++ b/test/CodeGen/SystemZ/frame-05.ll @@ -14,7 +14,7 @@ ; Use a different address for the final store, so that we can check that ; %r15 isn't referenced again until after that. define void @f1(i32 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r6, -112 @@ -82,7 +82,7 @@ define void @f1(i32 *%ptr) { ; from %r14 down, so that the STMG/LMG sequences aren't any longer than ; they need to be. define void @f2(i32 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stmg %r7, %r15, 56(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r7, -104 @@ -145,7 +145,7 @@ define void @f2(i32 *%ptr) { ; Like f1, but only needs one call-saved GPR, which ought to be %r14. define void @f3(i32 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r14, -48 @@ -188,7 +188,7 @@ define void @f3(i32 *%ptr) { ; This function should use all call-clobbered GPRs but no call-saved ones. ; It shouldn't need to touch the stack at all. define void @f4(i32 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r15 ; CHECK-NOT: %r6 ; CHECK-NOT: %r7 diff --git a/test/CodeGen/SystemZ/frame-06.ll b/test/CodeGen/SystemZ/frame-06.ll index 4c361f1e9fc91..ad22f10903adf 100644 --- a/test/CodeGen/SystemZ/frame-06.ll +++ b/test/CodeGen/SystemZ/frame-06.ll @@ -11,7 +11,7 @@ ; Use a different address for the final store, so that we can check that ; %r15 isn't referenced again until after that. define void @f1(i64 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r6, -112 @@ -79,7 +79,7 @@ define void @f1(i64 *%ptr) { ; from %r14 down, so that the STMG/LMG sequences aren't any longer than ; they need to be. define void @f2(i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stmg %r7, %r15, 56(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r7, -104 @@ -142,7 +142,7 @@ define void @f2(i64 *%ptr) { ; Like f1, but only needs one call-saved GPR, which ought to be %r14. define void @f3(i64 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK-NOT: %r15 ; CHECK: .cfi_offset %r14, -48 @@ -185,7 +185,7 @@ define void @f3(i64 *%ptr) { ; This function should use all call-clobbered GPRs but no call-saved ones. ; It shouldn't need to touch the stack at all. define void @f4(i64 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r15 ; CHECK-NOT: %r6 ; CHECK-NOT: %r7 diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll index cfe9f868c07b3..eab313744b943 100644 --- a/test/CodeGen/SystemZ/frame-07.ll +++ b/test/CodeGen/SystemZ/frame-07.ll @@ -5,11 +5,11 @@ ; Test a frame size that requires some FPRs to be saved and loaded using ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. -; The frame is big enough to require an emergency spill slot at 160(%r15), +; The frame is big enough to require two emergency spill slots at 160(%r15), ; as well as the 8 FPR save slots. Get a frame of size 4128 by allocating -; (4128 - 168 - 8 * 8) / 8 = 487 extra doublewords. +; (4128 - 176 - 8 * 8) / 8 = 486 extra doublewords. define void @f1(double *%ptr, i64 %x) { -; CHECK-NOFP: f1: +; CHECK-NOFP-LABEL: f1: ; CHECK-NOFP: aghi %r15, -4128 ; CHECK-NOFP: .cfi_def_cfa_offset 4288 ; CHECK-NOFP: stdy %f8, 4120(%r15) @@ -40,7 +40,7 @@ define void @f1(double *%ptr, i64 %x) { ; CHECK-NOFP: aghi %r15, 4128 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: stmg %r11, %r15, 88(%r15) ; CHECK-FP: aghi %r15, -4128 ; CHECK-FP: .cfi_def_cfa_offset 4288 @@ -65,8 +65,8 @@ define void @f1(double *%ptr, i64 %x) { ; CHECK-FP: ld %f15, 4064(%r11) ; CHECK-FP: lmg %r11, %r15, 4216(%r11) ; CHECK-FP: br %r14 - %y = alloca [487 x i64], align 8 - %elem = getelementptr inbounds [487 x i64]* %y, i64 0, i64 0 + %y = alloca [486 x i64], align 8 + %elem = getelementptr inbounds [486 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %elem %l0 = load volatile double *%ptr %l1 = load volatile double *%ptr @@ -127,9 +127,9 @@ define void @f1(double *%ptr, i64 %x) { ; good optimisation but is really a different test. ; ; As above, get a frame of size 524320 by allocating -; (524320 - 168 - 8 * 8) / 8 = 65511 extra doublewords. +; (524320 - 176 - 8 * 8) / 8 = 65510 extra doublewords. define void @f2(double *%ptr, i64 %x) { -; CHECK-NOFP: f2: +; CHECK-NOFP-LABEL: f2: ; CHECK-NOFP: agfi %r15, -524320 ; CHECK-NOFP: .cfi_def_cfa_offset 524480 ; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8 @@ -161,7 +161,7 @@ define void @f2(double *%ptr, i64 %x) { ; CHECK-NOFP: agfi %r15, 524320 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f2: +; CHECK-FP-LABEL: f2: ; CHECK-FP: stmg %r11, %r15, 88(%r15) ; CHECK-FP: agfi %r15, -524320 ; CHECK-FP: .cfi_def_cfa_offset 524480 @@ -194,8 +194,8 @@ define void @f2(double *%ptr, i64 %x) { ; CHECK-FP: aghi %r11, 128 ; CHECK-FP: lmg %r11, %r15, 524280(%r11) ; CHECK-FP: br %r14 - %y = alloca [65511 x i64], align 8 - %elem = getelementptr inbounds [65511 x i64]* %y, i64 0, i64 0 + %y = alloca [65510 x i64], align 8 + %elem = getelementptr inbounds [65510 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %elem %l0 = load volatile double *%ptr %l1 = load volatile double *%ptr diff --git a/test/CodeGen/SystemZ/frame-08.ll b/test/CodeGen/SystemZ/frame-08.ll index 6cf6378268f44..da2a6142fb47f 100644 --- a/test/CodeGen/SystemZ/frame-08.ll +++ b/test/CodeGen/SystemZ/frame-08.ll @@ -3,11 +3,11 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; This is the largest frame size that can use a plain LMG for %r6 and above. -; It is big enough to require an emergency spill slot at 160(%r15), -; so get a frame of size 524232 by allocating (524232 - 168) / 8 = 65508 +; It is big enough to require two emergency spill slots at 160(%r15), +; so get a frame of size 524232 by allocating (524232 - 176) / 8 = 65507 ; extra doublewords. define void @f1(i32 *%ptr, i64 %x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK: .cfi_offset %r6, -112 ; CHECK: .cfi_offset %r7, -104 @@ -64,18 +64,18 @@ define void @f1(i32 *%ptr, i64 %x) { store volatile i32 %add12, i32 *%ptr store volatile i32 %add13, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [65508 x i64], align 8 - %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0 + %y = alloca [65507 x i64], align 8 + %entry = getelementptr inbounds [65507 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } ; This is the largest frame size that can use a plain LMG for %r14 and above -; It is big enough to require an emergency spill slot at 160(%r15), -; so get a frame of size 524168 by allocating (524168 - 168) / 8 = 65500 +; It is big enough to require two emergency spill slots at 160(%r15), +; so get a frame of size 524168 by allocating (524168 - 176) / 8 = 65499 ; extra doublewords. define void @f2(i32 *%ptr, i64 %x) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: .cfi_offset %r14, -48 ; CHECK: .cfi_offset %r15, -40 @@ -100,8 +100,8 @@ define void @f2(i32 *%ptr, i64 %x) { store volatile i32 %add4, i32 *%ptr store volatile i32 %add5, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [65500 x i64], align 8 - %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0 + %y = alloca [65499 x i64], align 8 + %entry = getelementptr inbounds [65499 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } @@ -110,7 +110,7 @@ define void @f2(i32 *%ptr, i64 %x) { ; frame size that needs two instructions to perform the final LMG for ; %r6 and above. define void @f3(i32 *%ptr, i64 %x) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK: .cfi_offset %r6, -112 ; CHECK: .cfi_offset %r7, -104 @@ -167,8 +167,8 @@ define void @f3(i32 *%ptr, i64 %x) { store volatile i32 %add12, i32 *%ptr store volatile i32 %add13, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [65509 x i64], align 8 - %entry = getelementptr inbounds [65509 x i64]* %y, i64 0, i64 0 + %y = alloca [65508 x i64], align 8 + %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } @@ -177,7 +177,7 @@ define void @f3(i32 *%ptr, i64 %x) { ; frame size that needs two instructions to perform the final LMG for ; %r14 and %r15. define void @f4(i32 *%ptr, i64 %x) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: .cfi_offset %r14, -48 ; CHECK: .cfi_offset %r15, -40 @@ -202,8 +202,8 @@ define void @f4(i32 *%ptr, i64 %x) { store volatile i32 %add4, i32 *%ptr store volatile i32 %add5, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [65501 x i64], align 8 - %entry = getelementptr inbounds [65501 x i64]* %y, i64 0, i64 0 + %y = alloca [65500 x i64], align 8 + %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } @@ -211,7 +211,7 @@ define void @f4(i32 *%ptr, i64 %x) { ; This is the largest frame size for which the prepatory increment for ; "lmg %r14, %r15, ..." can be done using AGHI. define void @f5(i32 *%ptr, i64 %x) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: .cfi_offset %r14, -48 ; CHECK: .cfi_offset %r15, -40 @@ -236,8 +236,8 @@ define void @f5(i32 *%ptr, i64 %x) { store volatile i32 %add4, i32 *%ptr store volatile i32 %add5, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [69595 x i64], align 8 - %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0 + %y = alloca [69594 x i64], align 8 + %entry = getelementptr inbounds [69594 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } @@ -245,7 +245,7 @@ define void @f5(i32 *%ptr, i64 %x) { ; This is the smallest frame size for which the prepatory increment for ; "lmg %r14, %r15, ..." needs to be done using AGFI. define void @f6(i32 *%ptr, i64 %x) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: stmg %r14, %r15, 112(%r15) ; CHECK: .cfi_offset %r14, -48 ; CHECK: .cfi_offset %r15, -40 @@ -270,8 +270,8 @@ define void @f6(i32 *%ptr, i64 %x) { store volatile i32 %add4, i32 *%ptr store volatile i32 %add5, i32 *%ptr store volatile i32 %add14, i32 *%ptr - %y = alloca [69596 x i64], align 8 - %entry = getelementptr inbounds [69596 x i64]* %y, i64 0, i64 0 + %y = alloca [69595 x i64], align 8 + %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %entry ret void } diff --git a/test/CodeGen/SystemZ/frame-09.ll b/test/CodeGen/SystemZ/frame-09.ll index eac633623c5f1..8a4f99c343a04 100644 --- a/test/CodeGen/SystemZ/frame-09.ll +++ b/test/CodeGen/SystemZ/frame-09.ll @@ -6,7 +6,7 @@ ; We don't need to allocate any more than the caller-provided 160-byte ; area though. define i32 @f1(i32 %x) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r11, %r15, 88(%r15) ; CHECK: .cfi_offset %r11, -72 ; CHECK: .cfi_offset %r15, -40 @@ -22,7 +22,7 @@ define i32 @f1(i32 %x) { ; Make sure that frame accesses after the initial allocation are relative ; to %r11 rather than %r15. define void @f2(i64 %x) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stmg %r11, %r15, 88(%r15) ; CHECK: .cfi_offset %r11, -72 ; CHECK: .cfi_offset %r15, -40 @@ -41,7 +41,7 @@ define void @f2(i64 %x) { ; This function should require all GPRs but no other spill slots. ; It shouldn't need to allocate its own frame. define void @f3(i32 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK-NOT: %r15 ; CHECK-NOT: %r11 @@ -107,11 +107,11 @@ define void @f3(i32 *%ptr) { ret void } -; The largest frame for which the LMG is in range. This frame has an -; emergency spill slot at 160(%r11), so create a frame of size 524192 -; by allocating (524192 - 168) / 8 = 65503 doublewords. +; The largest frame for which the LMG is in range. This frame has two +; emergency spill slots at 160(%r11), so create a frame of size 524192 +; by allocating (524192 - 176) / 8 = 65502 doublewords. define void @f4(i64 %x) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stmg %r11, %r15, 88(%r15) ; CHECK: .cfi_offset %r11, -72 ; CHECK: .cfi_offset %r15, -40 @@ -119,19 +119,19 @@ define void @f4(i64 %x) { ; CHECK: .cfi_def_cfa_offset 524352 ; CHECK: lgr %r11, %r15 ; CHECK: .cfi_def_cfa_register %r11 -; CHECK: stg %r2, 168(%r11) +; CHECK: stg %r2, 176(%r11) ; CHECK-NOT: ag ; CHECK: lmg %r11, %r15, 524280(%r11) ; CHECK: br %r14 - %y = alloca [65503 x i64], align 8 - %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0 + %y = alloca [65502 x i64], align 8 + %ptr = getelementptr inbounds [65502 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } ; The next frame size larger than f4. define void @f5(i64 %x) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: stmg %r11, %r15, 88(%r15) ; CHECK: .cfi_offset %r11, -72 ; CHECK: .cfi_offset %r15, -40 @@ -139,12 +139,12 @@ define void @f5(i64 %x) { ; CHECK: .cfi_def_cfa_offset 524360 ; CHECK: lgr %r11, %r15 ; CHECK: .cfi_def_cfa_register %r11 -; CHECK: stg %r2, 168(%r11) +; CHECK: stg %r2, 176(%r11) ; CHECK: aghi %r11, 8 ; CHECK: lmg %r11, %r15, 524280(%r11) ; CHECK: br %r14 - %y = alloca [65504 x i64], align 8 - %ptr = getelementptr inbounds [65504 x i64]* %y, i64 0, i64 0 + %y = alloca [65503 x i64], align 8 + %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0 store volatile i64 %x, i64* %ptr ret void } diff --git a/test/CodeGen/SystemZ/frame-10.ll b/test/CodeGen/SystemZ/frame-10.ll index 399a4125933d6..b96973a9cb9df 100644 --- a/test/CodeGen/SystemZ/frame-10.ll +++ b/test/CodeGen/SystemZ/frame-10.ll @@ -5,7 +5,7 @@ declare i8 *@llvm.stacksave() define void @f1(i8 **%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stg %r15, 0(%r2) ; CHECK: br %r14 %addr = call i8 *@llvm.stacksave() diff --git a/test/CodeGen/SystemZ/frame-11.ll b/test/CodeGen/SystemZ/frame-11.ll index 84222056e6d02..5145b4d1c8628 100644 --- a/test/CodeGen/SystemZ/frame-11.ll +++ b/test/CodeGen/SystemZ/frame-11.ll @@ -7,7 +7,7 @@ declare void @llvm.stackrestore(i8 *) ; we should use a frame pointer and tear down the frame based on %r11 ; rather than %r15. define void @f1(i8 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r11, %r15, 88(%r15) ; CHECK: lgr %r11, %r15 ; CHECK: lgr %r15, %r2 diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll index fa6b845ea6f25..393850fbf6179 100644 --- a/test/CodeGen/SystemZ/frame-13.ll +++ b/test/CodeGen/SystemZ/frame-13.ll @@ -1,8 +1,11 @@ ; Test the handling of base + 12-bit displacement addresses for large frames, -; in cases where no 20-bit form exists. +; in cases where no 20-bit form exists. The tests here assume z10 register +; pressure, without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ +; RUN: FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \ +; RUN: FileCheck -check-prefix=CHECK-FP %s ; This file tests what happens when a displacement is converted from ; being relative to the start of a frame object to being relative to @@ -17,22 +20,22 @@ ; First check the highest in-range offset after conversion, which is 4092 ; for word-addressing instructions like MVHI. ; -; The last in-range doubleword offset is 4088. Since the frame has an -; emergency spill slot at 160(%r15), the amount that we need to allocate -; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980 +; The last in-range doubleword offset is 4088. Since the frame has two +; emergency spill slots at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978 ; words. define void @f1() { -; CHECK-NOFP: f1: +; CHECK-NOFP-LABEL: f1: ; CHECK-NOFP: mvhi 4092(%r15), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: mvhi 4092(%r11), 42 ; CHECK-FP: br %r14 - %region1 = alloca [980 x i32], align 8 - %region2 = alloca [980 x i32], align 8 - %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 1 - %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 1 + %region1 = alloca [978 x i32], align 8 + %region2 = alloca [978 x i32], align 8 + %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 1 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -40,19 +43,19 @@ define void @f1() { ; Test the first out-of-range offset. We cannot use an index register here. define void @f2() { -; CHECK-NOFP: f2: +; CHECK-NOFP-LABEL: f2: ; CHECK-NOFP: lay %r1, 4096(%r15) ; CHECK-NOFP: mvhi 0(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f2: +; CHECK-FP-LABEL: f2: ; CHECK-FP: lay %r1, 4096(%r11) ; CHECK-FP: mvhi 0(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [980 x i32], align 8 - %region2 = alloca [980 x i32], align 8 - %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + %region1 = alloca [978 x i32], align 8 + %region2 = alloca [978 x i32], align 8 + %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -60,19 +63,19 @@ define void @f2() { ; Test the next offset after that. define void @f3() { -; CHECK-NOFP: f3: +; CHECK-NOFP-LABEL: f3: ; CHECK-NOFP: lay %r1, 4096(%r15) ; CHECK-NOFP: mvhi 4(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f3: +; CHECK-FP-LABEL: f3: ; CHECK-FP: lay %r1, 4096(%r11) ; CHECK-FP: mvhi 4(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [980 x i32], align 8 - %region2 = alloca [980 x i32], align 8 - %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 3 - %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 3 + %region1 = alloca [978 x i32], align 8 + %region2 = alloca [978 x i32], align 8 + %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 3 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -80,19 +83,19 @@ define void @f3() { ; Add 4096 bytes (1024 words) to the size of each object and repeat. define void @f4() { -; CHECK-NOFP: f4: +; CHECK-NOFP-LABEL: f4: ; CHECK-NOFP: lay %r1, 4096(%r15) ; CHECK-NOFP: mvhi 4092(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f4: +; CHECK-FP-LABEL: f4: ; CHECK-FP: lay %r1, 4096(%r11) ; CHECK-FP: mvhi 4092(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [2004 x i32], align 8 - %region2 = alloca [2004 x i32], align 8 - %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1 - %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1 + %region1 = alloca [2002 x i32], align 8 + %region2 = alloca [2002 x i32], align 8 + %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 1 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -100,19 +103,19 @@ define void @f4() { ; ...as above. define void @f5() { -; CHECK-NOFP: f5: +; CHECK-NOFP-LABEL: f5: ; CHECK-NOFP: lay %r1, 8192(%r15) ; CHECK-NOFP: mvhi 0(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f5: +; CHECK-FP-LABEL: f5: ; CHECK-FP: lay %r1, 8192(%r11) ; CHECK-FP: mvhi 0(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [2004 x i32], align 8 - %region2 = alloca [2004 x i32], align 8 - %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 2 + %region1 = alloca [2002 x i32], align 8 + %region2 = alloca [2002 x i32], align 8 + %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 2 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -120,41 +123,41 @@ define void @f5() { ; ...as above. define void @f6() { -; CHECK-NOFP: f6: +; CHECK-NOFP-LABEL: f6: ; CHECK-NOFP: lay %r1, 8192(%r15) ; CHECK-NOFP: mvhi 4(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f6: +; CHECK-FP-LABEL: f6: ; CHECK-FP: lay %r1, 8192(%r11) ; CHECK-FP: mvhi 4(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [2004 x i32], align 8 - %region2 = alloca [2004 x i32], align 8 - %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 3 - %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 3 + %region1 = alloca [2002 x i32], align 8 + %region2 = alloca [2002 x i32], align 8 + %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 3 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void } ; Now try an offset of 4092 from the start of the object, with the object -; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2006 +; being at offset 8192. This time we need objects of (8192 - 176) / 4 = 2004 ; words. define void @f7() { -; CHECK-NOFP: f7: +; CHECK-NOFP-LABEL: f7: ; CHECK-NOFP: lay %r1, 8192(%r15) ; CHECK-NOFP: mvhi 4092(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f7: +; CHECK-FP-LABEL: f7: ; CHECK-FP: lay %r1, 8192(%r11) ; CHECK-FP: mvhi 4092(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [2006 x i32], align 8 - %region2 = alloca [2006 x i32], align 8 - %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023 - %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023 + %region1 = alloca [2004 x i32], align 8 + %region2 = alloca [2004 x i32], align 8 + %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1023 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void @@ -163,72 +166,71 @@ define void @f7() { ; Keep the object-relative offset the same but bump the size of the ; objects by one doubleword. define void @f8() { -; CHECK-NOFP: f8: +; CHECK-NOFP-LABEL: f8: ; CHECK-NOFP: lay %r1, 12288(%r15) ; CHECK-NOFP: mvhi 4(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f8: +; CHECK-FP-LABEL: f8: ; CHECK-FP: lay %r1, 12288(%r11) ; CHECK-FP: mvhi 4(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [2008 x i32], align 8 - %region2 = alloca [2008 x i32], align 8 - %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1023 - %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1023 + %region1 = alloca [2006 x i32], align 8 + %region2 = alloca [2006 x i32], align 8 + %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void } ; Check a case where the original displacement is out of range. The backend -; should force an LAY from the outset. We don't yet do any kind of anchor -; optimization, so there should be no offset on the MVHI itself. +; should force STY to be used instead. define void @f9() { -; CHECK-NOFP: f9: -; CHECK-NOFP: lay %r1, 12296(%r15) -; CHECK-NOFP: mvhi 0(%r1), 42 +; CHECK-NOFP-LABEL: f9: +; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42 +; CHECK-NOFP: sty [[TMP]], 12296(%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f9: -; CHECK-FP: lay %r1, 12296(%r11) -; CHECK-FP: mvhi 0(%r1), 42 +; CHECK-FP-LABEL: f9: +; CHECK-FP: lhi [[TMP:%r[0-5]]], 42 +; CHECK-FP: sty [[TMP]], 12296(%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2008 x i32], align 8 - %region2 = alloca [2008 x i32], align 8 - %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1024 - %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1024 + %region1 = alloca [2006 x i32], align 8 + %region2 = alloca [2006 x i32], align 8 + %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1024 + %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1024 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 ret void } -; Repeat f2 in a case that needs the emergency spill slot (because all +; Repeat f2 in a case that needs the emergency spill slots (because all ; call-clobbered registers are live and no call-saved ones have been ; allocated). define void @f10(i32 *%vptr) { -; CHECK-NOFP: f10: -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP-LABEL: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) ; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f10: -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP-LABEL: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: lay [[REGISTER]], 4096(%r11) ; CHECK-FP: mvhi 0([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i3 = load volatile i32 *%vptr %i4 = load volatile i32 *%vptr %i5 = load volatile i32 *%vptr - %region1 = alloca [980 x i32], align 8 - %region2 = alloca [980 x i32], align 8 - %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + %region1 = alloca [978 x i32], align 8 + %region2 = alloca [978 x i32], align 8 + %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 store volatile i32 %i0, i32 *%vptr @@ -239,26 +241,26 @@ define void @f10(i32 *%vptr) { ret void } -; And again with maximum register pressure. The only spill slot that the -; NOFP case needs is the emergency one, so the offsets are the same as for f2. +; And again with maximum register pressure. The only spill slots that the +; NOFP case needs are the emergency ones, so the offsets are the same as for f2. ; However, the FP case uses %r11 as the frame pointer and must therefore ; spill a second register. This leads to an extra displacement of 8. define void @f11(i32 *%vptr) { -; CHECK-NOFP: f11: +; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: stmg %r6, %r15, -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) ; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: lmg %r6, %r15, ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f11: +; CHECK-FP-LABEL: f11: ; CHECK-FP: stmg %r6, %r15, -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: lay [[REGISTER]], 4096(%r11) ; CHECK-FP: mvhi 8([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: lmg %r6, %r15, ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr @@ -275,10 +277,10 @@ define void @f11(i32 *%vptr) { %i12 = load volatile i32 *%vptr %i13 = load volatile i32 *%vptr %i14 = load volatile i32 *%vptr - %region1 = alloca [980 x i32], align 8 - %region2 = alloca [980 x i32], align 8 - %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + %region1 = alloca [978 x i32], align 8 + %region2 = alloca [978 x i32], align 8 + %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2 store volatile i32 42, i32 *%ptr1 store volatile i32 42, i32 *%ptr2 store volatile i32 %i0, i32 *%vptr diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll index d8ff0a54a7613..3b48179c40b6e 100644 --- a/test/CodeGen/SystemZ/frame-14.ll +++ b/test/CodeGen/SystemZ/frame-14.ll @@ -1,9 +1,13 @@ ; Test the handling of base + displacement addresses for large frames, ; in cases where both 12-bit and 20-bit displacements are allowed. +; The tests here assume z10 register pressure, without the high words +; being available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ +; RUN: FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \ +; RUN: FileCheck -check-prefix=CHECK-FP %s ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s - ; This file tests what happens when a displacement is converted from ; being relative to the start of a frame object to being relative to ; the frame itself. In some cases the test is only possible if two @@ -16,21 +20,21 @@ ; First check the highest offset that is in range of the 12-bit form. ; -; The last in-range doubleword offset is 4088. Since the frame has an -; emergency spill slot at 160(%r15), the amount that we need to allocate -; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes. +; The last in-range doubleword offset is 4088. Since the frame has two +; emergency spill slots at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes. define void @f1() { -; CHECK-NOFP: f1: +; CHECK-NOFP-LABEL: f1: ; CHECK-NOFP: mvi 4095(%r15), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: mvi 4095(%r11), 42 ; CHECK-FP: br %r14 - %region1 = alloca [3920 x i8], align 8 - %region2 = alloca [3920 x i8], align 8 - %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7 - %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7 + %region1 = alloca [3912 x i8], align 8 + %region2 = alloca [3912 x i8], align 8 + %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -38,17 +42,17 @@ define void @f1() { ; Test the first offset that is out-of-range of the 12-bit form. define void @f2() { -; CHECK-NOFP: f2: +; CHECK-NOFP-LABEL: f2: ; CHECK-NOFP: mviy 4096(%r15), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f2: +; CHECK-FP-LABEL: f2: ; CHECK-FP: mviy 4096(%r11), 42 ; CHECK-FP: br %r14 - %region1 = alloca [3920 x i8], align 8 - %region2 = alloca [3920 x i8], align 8 - %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [3912 x i8], align 8 + %region2 = alloca [3912 x i8], align 8 + %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -57,19 +61,19 @@ define void @f2() { ; Test the last offset that is in range of the 20-bit form. ; ; The last in-range doubleword offset is 524280, so by the same reasoning -; as above, we need to allocate objects of 524280 - 168 = 524122 bytes. +; as above, we need to allocate objects of 524280 - 176 = 524104 bytes. define void @f3() { -; CHECK-NOFP: f3: +; CHECK-NOFP-LABEL: f3: ; CHECK-NOFP: mviy 524287(%r15), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f3: +; CHECK-FP-LABEL: f3: ; CHECK-FP: mviy 524287(%r11), 42 ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -79,21 +83,21 @@ define void @f3() { ; and the offset is also out of LAY's range, so expect a constant load ; followed by an addition. define void @f4() { -; CHECK-NOFP: f4: +; CHECK-NOFP-LABEL: f4: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: mvi 0(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f4: +; CHECK-FP-LABEL: f4: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: mvi 0(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -102,21 +106,21 @@ define void @f4() { ; Add 4095 to the previous offset, to test the other end of the MVI range. ; The instruction will actually be STCY before frame lowering. define void @f5() { -; CHECK-NOFP: f5: +; CHECK-NOFP-LABEL: f5: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: mvi 4095(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f5: +; CHECK-FP-LABEL: f5: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: mvi 4095(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -124,21 +128,21 @@ define void @f5() { ; Test the next offset after that, which uses MVIY instead of MVI. define void @f6() { -; CHECK-NOFP: f6: +; CHECK-NOFP-LABEL: f6: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: mviy 4096(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f6: +; CHECK-FP-LABEL: f6: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: mviy 4096(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -149,21 +153,21 @@ define void @f6() { ; anchors 0x10000 bytes apart, so that the high part can be loaded using ; LLILH while still using MVI in more cases than 0x40000 anchors would. define void @f7() { -; CHECK-NOFP: f7: +; CHECK-NOFP-LABEL: f7: ; CHECK-NOFP: llilh %r1, 23 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: mviy 65535(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f7: +; CHECK-FP-LABEL: f7: ; CHECK-FP: llilh %r1, 23 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: mviy 65535(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [1048408 x i8], align 8 - %region2 = alloca [1048408 x i8], align 8 - %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 - %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 + %region1 = alloca [1048400 x i8], align 8 + %region2 = alloca [1048400 x i8], align 8 + %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -172,21 +176,21 @@ define void @f7() { ; Keep the object-relative offset the same but bump the size of the ; objects by one doubleword. define void @f8() { -; CHECK-NOFP: f8: +; CHECK-NOFP-LABEL: f8: ; CHECK-NOFP: llilh %r1, 24 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: mvi 7(%r1), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f8: +; CHECK-FP-LABEL: f8: ; CHECK-FP: llilh %r1, 24 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: mvi 7(%r1), 42 ; CHECK-FP: br %r14 - %region1 = alloca [1048416 x i8], align 8 - %region2 = alloca [1048416 x i8], align 8 - %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287 - %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void @@ -200,56 +204,56 @@ define void @f8() { ; The LA then gets lowered into the LLILH/LA form. The exact sequence ; isn't that important though. define void @f9() { -; CHECK-NOFP: f9: +; CHECK-NOFP-LABEL: f9: ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15) ; CHECK-NOFP: agfi [[R2]], 524288 ; CHECK-NOFP: mvi 0([[R2]]), 42 ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f9: +; CHECK-FP-LABEL: f9: ; CHECK-FP: llilh [[R1:%r[1-5]]], 16 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11) ; CHECK-FP: agfi [[R2]], 524288 ; CHECK-FP: mvi 0([[R2]]), 42 ; CHECK-FP: br %r14 - %region1 = alloca [1048416 x i8], align 8 - %region2 = alloca [1048416 x i8], align 8 - %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288 - %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 ret void } -; Repeat f4 in a case that needs the emergency spill slot (because all +; Repeat f4 in a case that needs the emergency spill slots (because all ; call-clobbered registers are live and no call-saved ones have been ; allocated). define void @f10(i32 *%vptr) { -; CHECK-NOFP: f10: -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP-LABEL: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: llilh [[REGISTER]], 8 ; CHECK-NOFP: agr [[REGISTER]], %r15 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42 -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f10: -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP-LABEL: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: llilh [[REGISTER]], 8 ; CHECK-FP: agr [[REGISTER]], %r11 ; CHECK-FP: mvi 0([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i3 = load volatile i32 *%vptr %i4 = load volatile i32 *%vptr %i5 = load volatile i32 *%vptr - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 store volatile i32 %i0, i32 *%vptr @@ -260,28 +264,28 @@ define void @f10(i32 *%vptr) { ret void } -; And again with maximum register pressure. The only spill slot that the -; NOFP case needs is the emergency one, so the offsets are the same as for f4. +; And again with maximum register pressure. The only spill slots that the +; NOFP case needs are the emergency ones, so the offsets are the same as for f4. ; However, the FP case uses %r11 as the frame pointer and must therefore ; spill a second register. This leads to an extra displacement of 8. define void @f11(i32 *%vptr) { -; CHECK-NOFP: f11: +; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: stmg %r6, %r15, -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: llilh [[REGISTER]], 8 ; CHECK-NOFP: agr [[REGISTER]], %r15 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42 -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: lmg %r6, %r15, ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f11: +; CHECK-FP-LABEL: f11: ; CHECK-FP: stmg %r6, %r15, -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: llilh [[REGISTER]], 8 ; CHECK-FP: agr [[REGISTER]], %r11 ; CHECK-FP: mvi 8([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: lmg %r6, %r15, ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr @@ -298,10 +302,10 @@ define void @f11(i32 *%vptr) { %i12 = load volatile i32 *%vptr %i13 = load volatile i32 *%vptr %i14 = load volatile i32 *%vptr - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 42, i8 *%ptr1 store volatile i8 42, i8 *%ptr2 store volatile i32 %i0, i32 *%vptr diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll index bc87e174d0b68..b3c95e73c1af6 100644 --- a/test/CodeGen/SystemZ/frame-15.ll +++ b/test/CodeGen/SystemZ/frame-15.ll @@ -1,8 +1,11 @@ ; Test the handling of base + index + 12-bit displacement addresses for -; large frames, in cases where no 20-bit form exists. +; large frames, in cases where no 20-bit form exists. The tests here +; assume z10 register pressure, without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ +; RUN: FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \ +; RUN: FileCheck -check-prefix=CHECK-FP %s declare void @foo(float *%ptr1, float *%ptr2) @@ -19,25 +22,25 @@ declare void @foo(float *%ptr1, float *%ptr2) ; First check the highest in-range offset after conversion, which is 4092 ; for word-addressing instructions like LDEB. ; -; The last in-range doubleword offset is 4088. Since the frame has an -; emergency spill slot at 160(%r15), the amount that we need to allocate -; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980 +; The last in-range doubleword offset is 4088. Since the frame has two +; emergency spill slots at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978 ; words. define void @f1(double *%dst) { -; CHECK-NOFP: f1: +; CHECK-NOFP-LABEL: f1: ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r11) ; CHECK-FP: br %r14 - %region1 = alloca [980 x float], align 8 - %region2 = alloca [980 x float], align 8 - %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + %region1 = alloca [978 x float], align 8 + %region2 = alloca [978 x float], align 8 + %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 1 - %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 1 + %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 1 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -49,22 +52,22 @@ define void @f1(double *%dst) { ; Test the first out-of-range offset. define void @f2(double *%dst) { -; CHECK-NOFP: f2: +; CHECK-NOFP-LABEL: f2: ; CHECK-NOFP: lghi %r1, 4096 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f2: +; CHECK-FP-LABEL: f2: ; CHECK-FP: lghi %r1, 4096 ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [980 x float], align 8 - %region2 = alloca [980 x float], align 8 - %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + %region1 = alloca [978 x float], align 8 + %region2 = alloca [978 x float], align 8 + %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -76,22 +79,22 @@ define void @f2(double *%dst) { ; Test the next offset after that. define void @f3(double *%dst) { -; CHECK-NOFP: f3: +; CHECK-NOFP-LABEL: f3: ; CHECK-NOFP: lghi %r1, 4096 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f3: +; CHECK-FP-LABEL: f3: ; CHECK-FP: lghi %r1, 4096 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [980 x float], align 8 - %region2 = alloca [980 x float], align 8 - %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + %region1 = alloca [978 x float], align 8 + %region2 = alloca [978 x float], align 8 + %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 3 - %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 3 + %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 3 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -103,22 +106,22 @@ define void @f3(double *%dst) { ; Add 4096 bytes (1024 words) to the size of each object and repeat. define void @f4(double *%dst) { -; CHECK-NOFP: f4: +; CHECK-NOFP-LABEL: f4: ; CHECK-NOFP: lghi %r1, 4096 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f4: +; CHECK-FP-LABEL: f4: ; CHECK-FP: lghi %r1, 4096 ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2004 x float], align 8 - %region2 = alloca [2004 x float], align 8 - %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2002 x float], align 8 + %region2 = alloca [2002 x float], align 8 + %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1 - %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1 + %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 1 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -130,22 +133,22 @@ define void @f4(double *%dst) { ; ...as above. define void @f5(double *%dst) { -; CHECK-NOFP: f5: +; CHECK-NOFP-LABEL: f5: ; CHECK-NOFP: lghi %r1, 8192 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f5: +; CHECK-FP-LABEL: f5: ; CHECK-FP: lghi %r1, 8192 ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2004 x float], align 8 - %region2 = alloca [2004 x float], align 8 - %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2002 x float], align 8 + %region2 = alloca [2002 x float], align 8 + %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 2 + %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 2 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -157,22 +160,22 @@ define void @f5(double *%dst) { ; ...as above. define void @f6(double *%dst) { -; CHECK-NOFP: f6: +; CHECK-NOFP-LABEL: f6: ; CHECK-NOFP: lghi %r1, 8192 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f6: +; CHECK-FP-LABEL: f6: ; CHECK-FP: lghi %r1, 8192 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2004 x float], align 8 - %region2 = alloca [2004 x float], align 8 - %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2002 x float], align 8 + %region2 = alloca [2002 x float], align 8 + %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 3 - %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 3 + %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 3 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -183,25 +186,25 @@ define void @f6(double *%dst) { } ; Now try an offset of 4092 from the start of the object, with the object -; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2006 +; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2004 ; words. define void @f7(double *%dst) { -; CHECK-NOFP: f7: +; CHECK-NOFP-LABEL: f7: ; CHECK-NOFP: lghi %r1, 8192 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f7: +; CHECK-FP-LABEL: f7: ; CHECK-FP: lghi %r1, 8192 ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2006 x float], align 8 - %region2 = alloca [2006 x float], align 8 - %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2004 x float], align 8 + %region2 = alloca [2004 x float], align 8 + %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023 - %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023 + %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1023 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -214,22 +217,22 @@ define void @f7(double *%dst) { ; Keep the object-relative offset the same but bump the size of the ; objects by one doubleword. define void @f8(double *%dst) { -; CHECK-NOFP: f8: +; CHECK-NOFP-LABEL: f8: ; CHECK-NOFP: lghi %r1, 12288 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f8: +; CHECK-FP-LABEL: f8: ; CHECK-FP: lghi %r1, 12288 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [2008 x float], align 8 - %region2 = alloca [2008 x float], align 8 - %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2006 x float], align 8 + %region2 = alloca [2006 x float], align 8 + %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1023 - %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1023 + %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -243,22 +246,22 @@ define void @f8(double *%dst) { ; should force an LAY from the outset. We don't yet do any kind of anchor ; optimization, so there should be no offset on the LDEB itself. define void @f9(double *%dst) { -; CHECK-NOFP: f9: +; CHECK-NOFP-LABEL: f9: ; CHECK-NOFP: lay %r1, 12296(%r15) ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f9: +; CHECK-FP-LABEL: f9: ; CHECK-FP: lay %r1, 12296(%r11) ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1) ; CHECK-FP: br %r14 - %region1 = alloca [2008 x float], align 8 - %region2 = alloca [2008 x float], align 8 - %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0 + %region1 = alloca [2006 x float], align 8 + %region2 = alloca [2006 x float], align 8 + %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1024 - %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1024 + %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1024 + %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1024 %float1 = load float *%ptr1 %float2 = load float *%ptr2 %double1 = fpext float %float1 to double @@ -268,31 +271,31 @@ define void @f9(double *%dst) { ret void } -; Repeat f2 in a case that needs the emergency spill slot, because all +; Repeat f2 in a case that needs the emergency spill slots, because all ; call-clobbered and allocated call-saved registers are live. Note that ; %vptr and %dst are copied to call-saved registers, freeing up %r2 and ; %r3 during the main test. define void @f10(i32 *%vptr, double *%dst) { -; CHECK-NOFP: f10: -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP-LABEL: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: lghi [[REGISTER]], 4096 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r15) -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f10: -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP-LABEL: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: lghi [[REGISTER]], 4096 ; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r11) -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: br %r14 - %region1 = alloca [980 x float], align 8 - %region2 = alloca [980 x float], align 8 - %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + %region1 = alloca [978 x float], align 8 + %region2 = alloca [978 x float], align 8 + %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 - %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i2 = load volatile i32 *%vptr @@ -318,24 +321,24 @@ define void @f10(i32 *%vptr, double *%dst) { ; Repeat f2 in a case where the index register is already occupied. define void @f11(double *%dst, i64 %index) { -; CHECK-NOFP: f11: +; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3 ; CHECK-NOFP: lay %r1, 4096(%r15) ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f11: +; CHECK-FP-LABEL: f11: ; CHECK-FP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3 ; CHECK-FP: lay %r1, 4096(%r11) ; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1) ; CHECK-FP: br %r14 - %region1 = alloca [980 x float], align 8 - %region2 = alloca [980 x float], align 8 - %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 - %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + %region1 = alloca [978 x float], align 8 + %region2 = alloca [978 x float], align 8 + %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0 call void @foo(float *%start1, float *%start2) - %elem1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 - %elem2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %elem1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2 + %elem2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2 %base1 = ptrtoint float *%elem1 to i64 %base2 = ptrtoint float *%elem2 to i64 %addr1 = add i64 %base1, %index diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll index cc5529f920ca3..f7e2dfa351494 100644 --- a/test/CodeGen/SystemZ/frame-16.ll +++ b/test/CodeGen/SystemZ/frame-16.ll @@ -1,8 +1,12 @@ ; Test the handling of base + index + displacement addresses for large frames, ; in cases where both 12-bit and 20-bit displacements are allowed. +; The tests here assume z10 register pressure, without the high words +; being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ +; RUN: FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \ +; RUN: FileCheck -check-prefix=CHECK-FP %s ; This file tests what happens when a displacement is converted from ; being relative to the start of a frame object to being relative to @@ -16,21 +20,21 @@ ; First check the highest offset that is in range of the 12-bit form. ; -; The last in-range doubleword offset is 4088. Since the frame has an -; emergency spill slot at 160(%r15), the amount that we need to allocate -; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes. +; The last in-range doubleword offset is 4088. Since the frame has two +; emergency spill slots at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes. define void @f1(i8 %byte) { -; CHECK-NOFP: f1: +; CHECK-NOFP-LABEL: f1: ; CHECK-NOFP: stc %r2, 4095(%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f1: +; CHECK-FP-LABEL: f1: ; CHECK-FP: stc %r2, 4095(%r11) ; CHECK-FP: br %r14 - %region1 = alloca [3920 x i8], align 8 - %region2 = alloca [3920 x i8], align 8 - %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7 - %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7 + %region1 = alloca [3912 x i8], align 8 + %region2 = alloca [3912 x i8], align 8 + %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -38,17 +42,17 @@ define void @f1(i8 %byte) { ; Test the first offset that is out-of-range of the 12-bit form. define void @f2(i8 %byte) { -; CHECK-NOFP: f2: +; CHECK-NOFP-LABEL: f2: ; CHECK-NOFP: stcy %r2, 4096(%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f2: +; CHECK-FP-LABEL: f2: ; CHECK-FP: stcy %r2, 4096(%r11) ; CHECK-FP: br %r14 - %region1 = alloca [3920 x i8], align 8 - %region2 = alloca [3920 x i8], align 8 - %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [3912 x i8], align 8 + %region2 = alloca [3912 x i8], align 8 + %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -57,19 +61,19 @@ define void @f2(i8 %byte) { ; Test the last offset that is in range of the 20-bit form. ; ; The last in-range doubleword offset is 524280, so by the same reasoning -; as above, we need to allocate objects of 524280 - 168 = 524122 bytes. +; as above, we need to allocate objects of 524280 - 176 = 524104 bytes. define void @f3(i8 %byte) { -; CHECK-NOFP: f3: +; CHECK-NOFP-LABEL: f3: ; CHECK-NOFP: stcy %r2, 524287(%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f3: +; CHECK-FP-LABEL: f3: ; CHECK-FP: stcy %r2, 524287(%r11) ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -79,19 +83,19 @@ define void @f3(i8 %byte) { ; and the offset is also out of LAY's range, so expect a constant load ; followed by an addition. define void @f4(i8 %byte) { -; CHECK-NOFP: f4: +; CHECK-NOFP-LABEL: f4: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: stc %r2, 0(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f4: +; CHECK-FP-LABEL: f4: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: stc %r2, 0(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -100,19 +104,19 @@ define void @f4(i8 %byte) { ; Add 4095 to the previous offset, to test the other end of the STC range. ; The instruction will actually be STCY before frame lowering. define void @f5(i8 %byte) { -; CHECK-NOFP: f5: +; CHECK-NOFP-LABEL: f5: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: stc %r2, 4095(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f5: +; CHECK-FP-LABEL: f5: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: stc %r2, 4095(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -120,19 +124,19 @@ define void @f5(i8 %byte) { ; Test the next offset after that, which uses STCY instead of STC. define void @f6(i8 %byte) { -; CHECK-NOFP: f6: +; CHECK-NOFP-LABEL: f6: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: stcy %r2, 4096(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f6: +; CHECK-FP-LABEL: f6: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: stcy %r2, 4096(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -143,19 +147,19 @@ define void @f6(i8 %byte) { ; anchors 0x10000 bytes apart, so that the high part can be loaded using ; LLILH while still using STC in more cases than 0x40000 anchors would. define void @f7(i8 %byte) { -; CHECK-NOFP: f7: +; CHECK-NOFP-LABEL: f7: ; CHECK-NOFP: llilh %r1, 23 ; CHECK-NOFP: stcy %r2, 65535(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f7: +; CHECK-FP-LABEL: f7: ; CHECK-FP: llilh %r1, 23 ; CHECK-FP: stcy %r2, 65535(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [1048408 x i8], align 8 - %region2 = alloca [1048408 x i8], align 8 - %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 - %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 + %region1 = alloca [1048400 x i8], align 8 + %region2 = alloca [1048400 x i8], align 8 + %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -164,19 +168,19 @@ define void @f7(i8 %byte) { ; Keep the object-relative offset the same but bump the size of the ; objects by one doubleword. define void @f8(i8 %byte) { -; CHECK-NOFP: f8: +; CHECK-NOFP-LABEL: f8: ; CHECK-NOFP: llilh %r1, 24 ; CHECK-NOFP: stc %r2, 7(%r1,%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f8: +; CHECK-FP-LABEL: f8: ; CHECK-FP: llilh %r1, 24 ; CHECK-FP: stc %r2, 7(%r1,%r11) ; CHECK-FP: br %r14 - %region1 = alloca [1048416 x i8], align 8 - %region2 = alloca [1048416 x i8], align 8 - %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287 - %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void @@ -190,53 +194,53 @@ define void @f8(i8 %byte) { ; The LA then gets lowered into the LLILH/LA form. The exact sequence ; isn't that important though. define void @f9(i8 %byte) { -; CHECK-NOFP: f9: +; CHECK-NOFP-LABEL: f9: ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15) ; CHECK-NOFP: agfi [[R2]], 524288 ; CHECK-NOFP: stc %r2, 0([[R2]]) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f9: +; CHECK-FP-LABEL: f9: ; CHECK-FP: llilh [[R1:%r[1-5]]], 16 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11) ; CHECK-FP: agfi [[R2]], 524288 ; CHECK-FP: stc %r2, 0([[R2]]) ; CHECK-FP: br %r14 - %region1 = alloca [1048416 x i8], align 8 - %region2 = alloca [1048416 x i8], align 8 - %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288 - %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void } -; Repeat f4 in a case that needs the emergency spill slot (because all +; Repeat f4 in a case that needs the emergency spill slots (because all ; call-clobbered registers are live and no call-saved ones have been ; allocated). define void @f10(i32 *%vptr, i8 %byte) { -; CHECK-NOFP: f10: -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP-LABEL: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: llilh [[REGISTER]], 8 ; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15) -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f10: -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP-LABEL: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: llilh [[REGISTER]], 8 ; CHECK-FP: stc %r3, 0([[REGISTER]],%r11) -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i4 = load volatile i32 *%vptr %i5 = load volatile i32 *%vptr - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 store volatile i32 %i0, i32 *%vptr @@ -246,26 +250,26 @@ define void @f10(i32 *%vptr, i8 %byte) { ret void } -; And again with maximum register pressure. The only spill slot that the -; NOFP case needs is the emergency one, so the offsets are the same as for f4. +; And again with maximum register pressure. The only spill slots that the +; NOFP case needs are the emergency ones, so the offsets are the same as for f4. ; However, the FP case uses %r11 as the frame pointer and must therefore ; spill a second register. This leads to an extra displacement of 8. define void @f11(i32 *%vptr, i8 %byte) { -; CHECK-NOFP: f11: +; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: stmg %r6, %r15, -; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15) ; CHECK-NOFP: llilh [[REGISTER]], 8 ; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15) -; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: lmg %r6, %r15, ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f11: +; CHECK-FP-LABEL: f11: ; CHECK-FP: stmg %r6, %r15, -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) ; CHECK-FP: llilh [[REGISTER]], 8 ; CHECK-FP: stc %r3, 8([[REGISTER]],%r11) -; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) ; CHECK-FP: lmg %r6, %r15, ; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr @@ -281,10 +285,10 @@ define void @f11(i32 *%vptr, i8 %byte) { %i12 = load volatile i32 *%vptr %i13 = load volatile i32 *%vptr %i14 = load volatile i32 *%vptr - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 store volatile i32 %i0, i32 *%vptr @@ -305,22 +309,22 @@ define void @f11(i32 *%vptr, i8 %byte) { ; Repeat f4 in a case where the index register is already occupied. define void @f12(i8 %byte, i64 %index) { -; CHECK-NOFP: f12: +; CHECK-NOFP-LABEL: f12: ; CHECK-NOFP: llilh %r1, 8 ; CHECK-NOFP: agr %r1, %r15 ; CHECK-NOFP: stc %r2, 0(%r3,%r1) ; CHECK-NOFP: br %r14 ; -; CHECK-FP: f12: +; CHECK-FP-LABEL: f12: ; CHECK-FP: llilh %r1, 8 ; CHECK-FP: agr %r1, %r11 ; CHECK-FP: stc %r2, 0(%r3,%r1) ; CHECK-FP: br %r14 - %region1 = alloca [524112 x i8], align 8 - %region2 = alloca [524112 x i8], align 8 + %region1 = alloca [524104 x i8], align 8 + %region2 = alloca [524104 x i8], align 8 %index1 = add i64 %index, 8 - %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 %index1 - %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 %index1 + %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 %index1 + %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 %index1 store volatile i8 %byte, i8 *%ptr1 store volatile i8 %byte, i8 *%ptr2 ret void diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll index 613d9f8795584..97cf83dfd78e2 100644 --- a/test/CodeGen/SystemZ/frame-17.ll +++ b/test/CodeGen/SystemZ/frame-17.ll @@ -6,7 +6,7 @@ ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly ; 160 + 8 * 8 = 232. define void @f1(float *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: aghi %r15, -232 ; CHECK: std %f8, 224(%r15) ; CHECK: std %f9, 216(%r15) @@ -70,7 +70,7 @@ define void @f1(float *%ptr) { ; Same for doubles, except that the full spill slot is used. define void @f2(double *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r15, -232 ; CHECK: std %f8, 224(%r15) ; CHECK: std %f9, 216(%r15) @@ -131,7 +131,7 @@ define void @f2(double *%ptr) { ; The long double case needs a 16-byte spill slot. define void @f3(fp128 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: aghi %r15, -240 ; CHECK: std %f8, 232(%r15) ; CHECK: std %f9, 224(%r15) diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll index a9977ed04b423..21dfc1238a13a 100644 --- a/test/CodeGen/SystemZ/frame-18.ll +++ b/test/CodeGen/SystemZ/frame-18.ll @@ -1,11 +1,12 @@ -; Test spilling of GPRs. +; Test spilling of GPRs. The tests here assume z10 register pressure, +; without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; We need to allocate a 4-byte spill slot, rounded to 8 bytes. The frame ; size should be exactly 160 + 8 = 168. define void @f1(i32 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK: aghi %r15, -168 ; CHECK-NOT: 160(%r15) @@ -50,7 +51,7 @@ define void @f1(i32 *%ptr) { ; Same for i64, except that the full spill slot is used. define void @f2(i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stmg %r6, %r15, 48(%r15) ; CHECK: aghi %r15, -168 ; CHECK: stg [[REGISTER:%r[0-9]+]], 160(%r15) diff --git a/test/CodeGen/SystemZ/insert-01.ll b/test/CodeGen/SystemZ/insert-01.ll index 98ddf56959bf6..0b54e85dc4edb 100644 --- a/test/CodeGen/SystemZ/insert-01.ll +++ b/test/CodeGen/SystemZ/insert-01.ll @@ -5,7 +5,7 @@ ; Check a plain insertion with (or (and ... -0xff) (zext (load ....))). ; The whole sequence can be performed by IC. define i32 @f1(i32 %orig, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: ni ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -18,7 +18,7 @@ define i32 @f1(i32 %orig, i8 *%ptr) { ; Like f1, but with the operands reversed. define i32 @f2(i32 %orig, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: ni ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -32,7 +32,7 @@ define i32 @f2(i32 %orig, i8 *%ptr) { ; Check a case where more bits than lower 8 are masked out of the ; register value. We can use IC but must keep the original mask. define i32 @f3(i32 %orig, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nill %r2, 65024 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -45,7 +45,7 @@ define i32 @f3(i32 %orig, i8 *%ptr) { ; Like f3, but with the operands reversed. define i32 @f4(i32 %orig, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nill %r2, 65024 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -58,7 +58,7 @@ define i32 @f4(i32 %orig, i8 *%ptr) { ; Check a case where the low 8 bits are cleared by a shift left. define i32 @f5(i32 %orig, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sll %r2, 8 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -71,7 +71,7 @@ define i32 @f5(i32 %orig, i8 *%ptr) { ; Like f5, but with the operands reversed. define i32 @f6(i32 %orig, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sll %r2, 8 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -84,7 +84,7 @@ define i32 @f6(i32 %orig, i8 *%ptr) { ; Check insertions into a constant. define i32 @f7(i32 %orig, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lhi %r2, 256 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -96,7 +96,7 @@ define i32 @f7(i32 %orig, i8 *%ptr) { ; Like f7, but with the operands reversed. define i32 @f8(i32 %orig, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lhi %r2, 256 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -108,7 +108,7 @@ define i32 @f8(i32 %orig, i8 *%ptr) { ; Check the high end of the IC range. define i32 @f9(i32 %orig, i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ic %r2, 4095(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -121,7 +121,7 @@ define i32 @f9(i32 %orig, i8 *%src) { ; Check the next byte up, which should use ICY instead of IC. define i32 @f10(i32 %orig, i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: icy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -134,7 +134,7 @@ define i32 @f10(i32 %orig, i8 *%src) { ; Check the high end of the ICY range. define i32 @f11(i32 %orig, i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: icy %r2, 524287(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -148,7 +148,7 @@ define i32 @f11(i32 %orig, i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f12(i32 %orig, i8 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agfi %r3, 524288 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -162,7 +162,7 @@ define i32 @f12(i32 %orig, i8 *%src) { ; Check the high end of the negative ICY range. define i32 @f13(i32 %orig, i8 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: icy %r2, -1(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -175,7 +175,7 @@ define i32 @f13(i32 %orig, i8 *%src) { ; Check the low end of the ICY range. define i32 @f14(i32 %orig, i8 *%src) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: icy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -189,7 +189,7 @@ define i32 @f14(i32 %orig, i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f15(i32 %orig, i8 *%src) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: agfi %r3, -524289 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -203,7 +203,7 @@ define i32 @f15(i32 %orig, i8 *%src) { ; Check that IC allows an index. define i32 @f16(i32 %orig, i8 *%src, i64 %index) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %ptr1 = getelementptr i8 *%src, i64 %index @@ -217,7 +217,7 @@ define i32 @f16(i32 %orig, i8 *%src, i64 %index) { ; Check that ICY allows an index. define i32 @f17(i32 %orig, i8 *%src, i64 %index) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %ptr1 = getelementptr i8 *%src, i64 %index diff --git a/test/CodeGen/SystemZ/insert-02.ll b/test/CodeGen/SystemZ/insert-02.ll index 471889dede6a0..7a85b0bee4d84 100644 --- a/test/CodeGen/SystemZ/insert-02.ll +++ b/test/CodeGen/SystemZ/insert-02.ll @@ -5,7 +5,7 @@ ; Check a plain insertion with (or (and ... -0xff) (zext (load ....))). ; The whole sequence can be performed by IC. define i64 @f1(i64 %orig, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: ni ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -18,7 +18,7 @@ define i64 @f1(i64 %orig, i8 *%ptr) { ; Like f1, but with the operands reversed. define i64 @f2(i64 %orig, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: ni ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -32,7 +32,7 @@ define i64 @f2(i64 %orig, i8 *%ptr) { ; Check a case where more bits than lower 8 are masked out of the ; register value. We can use IC but must keep the original mask. define i64 @f3(i64 %orig, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: nill %r2, 65024 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -45,7 +45,7 @@ define i64 @f3(i64 %orig, i8 *%ptr) { ; Like f3, but with the operands reversed. define i64 @f4(i64 %orig, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: nill %r2, 65024 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -58,7 +58,7 @@ define i64 @f4(i64 %orig, i8 *%ptr) { ; Check a case where the low 8 bits are cleared by a shift left. define i64 @f5(i64 %orig, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sllg %r2, %r2, 8 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -71,7 +71,7 @@ define i64 @f5(i64 %orig, i8 *%ptr) { ; Like f5, but with the operands reversed. define i64 @f6(i64 %orig, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r2, %r2, 8 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -84,7 +84,7 @@ define i64 @f6(i64 %orig, i8 *%ptr) { ; Check insertions into a constant. define i64 @f7(i64 %orig, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lghi %r2, 256 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -96,7 +96,7 @@ define i64 @f7(i64 %orig, i8 *%ptr) { ; Like f7, but with the operands reversed. define i64 @f8(i64 %orig, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lghi %r2, 256 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -108,7 +108,7 @@ define i64 @f8(i64 %orig, i8 *%ptr) { ; Check the high end of the IC range. define i64 @f9(i64 %orig, i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ic %r2, 4095(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -121,7 +121,7 @@ define i64 @f9(i64 %orig, i8 *%src) { ; Check the next byte up, which should use ICY instead of IC. define i64 @f10(i64 %orig, i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: icy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -134,7 +134,7 @@ define i64 @f10(i64 %orig, i8 *%src) { ; Check the high end of the ICY range. define i64 @f11(i64 %orig, i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: icy %r2, 524287(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -148,7 +148,7 @@ define i64 @f11(i64 %orig, i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f12(i64 %orig, i8 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agfi %r3, 524288 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -162,7 +162,7 @@ define i64 @f12(i64 %orig, i8 *%src) { ; Check the high end of the negative ICY range. define i64 @f13(i64 %orig, i8 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: icy %r2, -1(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -175,7 +175,7 @@ define i64 @f13(i64 %orig, i8 *%src) { ; Check the low end of the ICY range. define i64 @f14(i64 %orig, i8 *%src) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: icy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -189,7 +189,7 @@ define i64 @f14(i64 %orig, i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f15(i64 %orig, i8 *%src) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: agfi %r3, -524289 ; CHECK: ic %r2, 0(%r3) ; CHECK: br %r14 @@ -203,7 +203,7 @@ define i64 @f15(i64 %orig, i8 *%src) { ; Check that IC allows an index. define i64 @f16(i64 %orig, i8 *%src, i64 %index) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %ptr1 = getelementptr i8 *%src, i64 %index @@ -217,7 +217,7 @@ define i64 @f16(i64 %orig, i8 *%src, i64 %index) { ; Check that ICY allows an index. define i64 @f17(i64 %orig, i8 *%src, i64 %index) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %ptr1 = getelementptr i8 *%src, i64 %index diff --git a/test/CodeGen/SystemZ/insert-03.ll b/test/CodeGen/SystemZ/insert-03.ll index 261eabd1be7dd..c3c1ae316c9fb 100644 --- a/test/CodeGen/SystemZ/insert-03.ll +++ b/test/CodeGen/SystemZ/insert-03.ll @@ -5,7 +5,7 @@ ; Check the lowest useful IILL value. (We use NILL rather than IILL ; to clear 16 bits.) define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: ni ; CHECK: iill %r2, 1 ; CHECK: br %r14 @@ -16,7 +16,7 @@ define i32 @f1(i32 %a) { ; Check a middle value. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: ni ; CHECK: iill %r2, 32769 ; CHECK: br %r14 @@ -28,7 +28,7 @@ define i32 @f2(i32 %a) { ; Check the highest useful IILL value. (We use OILL rather than IILL ; to set 16 bits.) define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: ni ; CHECK: iill %r2, 65534 ; CHECK: br %r14 @@ -39,7 +39,7 @@ define i32 @f3(i32 %a) { ; Check the lowest useful IILH value. define i32 @f4(i32 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: ni ; CHECK: iilh %r2, 1 ; CHECK: br %r14 @@ -50,7 +50,7 @@ define i32 @f4(i32 %a) { ; Check a middle value. define i32 @f5(i32 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: ni ; CHECK: iilh %r2, 32767 ; CHECK: br %r14 @@ -61,7 +61,7 @@ define i32 @f5(i32 %a) { ; Check the highest useful IILH value. define i32 @f6(i32 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: ni ; CHECK: iilh %r2, 65534 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/insert-04.ll b/test/CodeGen/SystemZ/insert-04.ll index 07f88b9859eb7..5ce99dfcb7baa 100644 --- a/test/CodeGen/SystemZ/insert-04.ll +++ b/test/CodeGen/SystemZ/insert-04.ll @@ -5,7 +5,7 @@ ; Check the lowest useful IILL value. (We use NILL rather than IILL ; to clear 16 bits.) define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: ni ; CHECK: iill %r2, 1 ; CHECK: br %r14 @@ -16,7 +16,7 @@ define i64 @f1(i64 %a) { ; Check a middle value. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: ni ; CHECK: iill %r2, 32769 ; CHECK: br %r14 @@ -28,7 +28,7 @@ define i64 @f2(i64 %a) { ; Check the highest useful IILL value. (We use OILL rather than IILL ; to set 16 bits.) define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: ni ; CHECK: iill %r2, 65534 ; CHECK: br %r14 @@ -39,7 +39,7 @@ define i64 @f3(i64 %a) { ; Check the lowest useful IILH value. define i64 @f4(i64 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: ni ; CHECK: iilh %r2, 1 ; CHECK: br %r14 @@ -50,7 +50,7 @@ define i64 @f4(i64 %a) { ; Check a middle value. define i64 @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: ni ; CHECK: iilh %r2, 32767 ; CHECK: br %r14 @@ -61,7 +61,7 @@ define i64 @f5(i64 %a) { ; Check the highest useful IILH value. define i64 @f6(i64 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: ni ; CHECK: iilh %r2, 65534 ; CHECK: br %r14 @@ -72,7 +72,7 @@ define i64 @f6(i64 %a) { ; Check the lowest useful IIHL value. define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: ni ; CHECK: iihl %r2, 1 ; CHECK: br %r14 @@ -83,7 +83,7 @@ define i64 @f7(i64 %a) { ; Check a middle value. define i64 @f8(i64 %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK-NOT: ni ; CHECK: iihl %r2, 32767 ; CHECK: br %r14 @@ -94,7 +94,7 @@ define i64 @f8(i64 %a) { ; Check the highest useful IIHL value. define i64 @f9(i64 %a) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK-NOT: ni ; CHECK: iihl %r2, 65534 ; CHECK: br %r14 @@ -105,7 +105,7 @@ define i64 @f9(i64 %a) { ; Check the lowest useful IIHH value. define i64 @f10(i64 %a) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: ni ; CHECK: iihh %r2, 1 ; CHECK: br %r14 @@ -116,7 +116,7 @@ define i64 @f10(i64 %a) { ; Check a middle value. define i64 @f11(i64 %a) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK-NOT: ni ; CHECK: iihh %r2, 32767 ; CHECK: br %r14 @@ -127,7 +127,7 @@ define i64 @f11(i64 %a) { ; Check the highest useful IIHH value. define i64 @f12(i64 %a) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK-NOT: ni ; CHECK: iihh %r2, 65534 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll index da51676b99cfa..b76859a568f31 100644 --- a/test/CodeGen/SystemZ/insert-05.ll +++ b/test/CodeGen/SystemZ/insert-05.ll @@ -4,7 +4,7 @@ ; Prefer LHI over IILF for signed 16-bit constants. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: ni ; CHECK: lhi %r2, 1 ; CHECK: br %r14 @@ -15,7 +15,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the LHI range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: ni ; CHECK: lhi %r2, 32767 ; CHECK: br %r14 @@ -26,7 +26,7 @@ define i64 @f2(i64 %a) { ; Check the next value up, which should use IILF instead. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: ni ; CHECK: iilf %r2, 32768 ; CHECK: br %r14 @@ -37,7 +37,7 @@ define i64 @f3(i64 %a) { ; Check a value in which the lower 16 bits are clear. define i64 @f4(i64 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: ni ; CHECK: iilf %r2, 65536 ; CHECK: br %r14 @@ -48,7 +48,7 @@ define i64 @f4(i64 %a) { ; Check the highest useful IILF value (-0x8001). define i64 @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: ni ; CHECK: iilf %r2, 4294934527 ; CHECK: br %r14 @@ -59,7 +59,7 @@ define i64 @f5(i64 %a) { ; Check the next value up, which should use LHI instead. define i64 @f6(i64 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: ni ; CHECK: lhi %r2, -32768 ; CHECK: br %r14 @@ -71,7 +71,7 @@ define i64 @f6(i64 %a) { ; Check the highest useful LHI value. (We use OILF for -1 instead, although ; LHI might be better there too.) define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: ni ; CHECK: lhi %r2, -2 ; CHECK: br %r14 @@ -83,7 +83,7 @@ define i64 @f7(i64 %a) { ; Check that SRLG is still used if some of the high bits are known to be 0 ; (and so might be removed from the mask). define i64 @f8(i64 %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: srlg %r2, %r2, 1 ; CHECK-NEXT: iilf %r2, 32768 ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i64 @f8(i64 %a) { ; Repeat f8 with addition, which is known to be equivalent to OR in this case. define i64 @f9(i64 %a) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: srlg %r2, %r2, 1 ; CHECK-NEXT: iilf %r2, 32768 ; CHECK: br %r14 @@ -107,7 +107,7 @@ define i64 @f9(i64 %a) { ; Repeat f8 with already-zero bits removed from the mask. define i64 @f10(i64 %a) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: srlg %r2, %r2, 1 ; CHECK-NEXT: iilf %r2, 32768 ; CHECK: br %r14 @@ -119,7 +119,7 @@ define i64 @f10(i64 %a) { ; Repeat f10 with addition, which is known to be equivalent to OR in this case. define i64 @f11(i64 %a) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: srlg %r2, %r2, 1 ; CHECK-NEXT: iilf %r2, 32768 ; CHECK: br %r14 @@ -131,7 +131,7 @@ define i64 @f11(i64 %a) { ; Check the lowest useful IIHF value. define i64 @f12(i64 %a) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK-NOT: ni ; CHECK: iihf %r2, 1 ; CHECK: br %r14 @@ -142,7 +142,7 @@ define i64 @f12(i64 %a) { ; Check a value in which the lower 16 bits are clear. define i64 @f13(i64 %a) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK-NOT: ni ; CHECK: iihf %r2, 2147483648 ; CHECK: br %r14 @@ -153,7 +153,7 @@ define i64 @f13(i64 %a) { ; Check the highest useful IIHF value (0xfffffffe). define i64 @f14(i64 %a) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK-NOT: ni ; CHECK: iihf %r2, 4294967294 ; CHECK: br %r14 @@ -165,7 +165,7 @@ define i64 @f14(i64 %a) { ; Check a case in which some of the low 32 bits are known to be clear, ; and so could be removed from the AND mask. define i64 @f15(i64 %a) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: sllg %r2, %r2, 1 ; CHECK-NEXT: iihf %r2, 1 ; CHECK: br %r14 @@ -177,7 +177,7 @@ define i64 @f15(i64 %a) { ; Repeat f15 with the zero bits explicitly removed from the mask. define i64 @f16(i64 %a) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: sllg %r2, %r2, 1 ; CHECK-NEXT: iihf %r2, 1 ; CHECK: br %r14 @@ -189,7 +189,7 @@ define i64 @f16(i64 %a) { ; Check concatenation of two i32s. define i64 @f17(i32 %a) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: msr %r2, %r2 ; CHECK-NEXT: iihf %r2, 1 ; CHECK: br %r14 @@ -201,7 +201,7 @@ define i64 @f17(i32 %a) { ; Repeat f17 with the operands reversed. define i64 @f18(i32 %a) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: msr %r2, %r2 ; CHECK-NEXT: iihf %r2, 1 ; CHECK: br %r14 @@ -213,7 +213,7 @@ define i64 @f18(i32 %a) { ; The truncation here isn't free; we need an explicit zero extension. define i64 @f19(i32 %a) { -; CHECK: f19: +; CHECK-LABEL: f19: ; CHECK: llgcr %r2, %r2 ; CHECK: oihl %r2, 1 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll index 4a13ef47c8889..edcd0c5dccd2c 100644 --- a/test/CodeGen/SystemZ/insert-06.ll +++ b/test/CodeGen/SystemZ/insert-06.ll @@ -4,7 +4,7 @@ ; Insertion of an i32 can be done using LR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: {{%r[23]}} ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -16,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; ... and again with the operands reversed. define i64 @f2(i64 %a, i32 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: {{%r[23]}} ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -28,7 +28,7 @@ define i64 @f2(i64 %a, i32 %b) { ; Like f1, but with "in register" zero extension. define i64 @f3(i64 %a, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: {{%r[23]}} ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -40,7 +40,7 @@ define i64 @f3(i64 %a, i64 %b) { ; ... and again with the operands reversed. define i64 @f4(i64 %a, i64 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: {{%r[23]}} ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -52,7 +52,7 @@ define i64 @f4(i64 %a, i64 %b) { ; Unary operations can be done directly into the low half. define i64 @f5(i64 %a, i32 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: {{%r[23]}} ; CHECK: lcr %r2, %r3 ; CHECK: br %r14 @@ -65,7 +65,7 @@ define i64 @f5(i64 %a, i32 %b) { ; ...likewise three-operand binary operations like RLL. define i64 @f6(i64 %a, i32 %b) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: {{%r[23]}} ; CHECK: rll %r2, %r3, 1 ; CHECK: br %r14 @@ -81,7 +81,7 @@ define i64 @f6(i64 %a, i32 %b) { ; Loads can be done directly into the low half. The range of L is checked ; in the move tests. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: {{%r[23]}} ; CHECK: l %r2, 0(%r3) ; CHECK: br %r14 @@ -94,7 +94,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; ...likewise extending loads. define i64 @f8(i64 %a, i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK-NOT: {{%r[23]}} ; CHECK: lb %r2, 0(%r3) ; CHECK: br %r14 @@ -110,7 +110,7 @@ define i64 @f8(i64 %a, i8 *%src) { ; that the upper half of one OR operand and the lower half of the other are ; both clear. define i64 @f9(i64 %a, i32 %b) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: sllg %r2, %r2, 32 ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -122,7 +122,7 @@ define i64 @f9(i64 %a, i32 %b) { ; ...and again with the operands reversed. define i64 @f10(i64 %a, i32 %b) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: sllg %r2, %r2, 32 ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -134,7 +134,7 @@ define i64 @f10(i64 %a, i32 %b) { ; Like f9, but with "in register" zero extension. define i64 @f11(i64 %a, i64 %b) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: lr %r2, %r3 ; CHECK: br %r14 %shift = shl i64 %a, 32 @@ -145,7 +145,7 @@ define i64 @f11(i64 %a, i64 %b) { ; ...and again with the operands reversed. define i64 @f12(i64 %a, i64 %b) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: lr %r2, %r3 ; CHECK: br %r14 %shift = shl i64 %a, 32 @@ -156,7 +156,7 @@ define i64 @f12(i64 %a, i64 %b) { ; Like f9, but for larger shifts than 32. define i64 @f13(i64 %a, i32 %b) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: sllg %r2, %r2, 60 ; CHECK: lr %r2, %r3 ; CHECK: br %r14 @@ -165,3 +165,16 @@ define i64 @f13(i64 %a, i32 %b) { %or = or i64 %shift, %low ret i64 %or } + +; We previously wrongly removed the upper AND as dead. +define i64 @f14(i64 %a, i64 %b) { +; CHECK-LABEL: f14: +; CHECK: risbg {{%r[0-5]}}, %r2, 6, 134, 0 +; CHECK: br %r14 + %and1 = and i64 %a, 144115188075855872 + %and2 = and i64 %b, 15 + %or = or i64 %and1, %and2 + %res = icmp eq i64 %or, 0 + %ext = sext i1 %res to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-abs-01.ll b/test/CodeGen/SystemZ/int-abs-01.ll new file mode 100644 index 0000000000000..40fb61192c6e2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-abs-01.ll @@ -0,0 +1,83 @@ +; Test integer absolute. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i32->i32 absolute using slt. +define i32 @f1(i32 %val) { +; CHECK-LABEL: f1: +; CHECK: lpr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp slt i32 %val, 0 + %neg = sub i32 0, %val + %res = select i1 %cmp, i32 %neg, i32 %val + ret i32 %res +} + +; Test i32->i32 absolute using sle. +define i32 @f2(i32 %val) { +; CHECK-LABEL: f2: +; CHECK: lpr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sle i32 %val, 0 + %neg = sub i32 0, %val + %res = select i1 %cmp, i32 %neg, i32 %val + ret i32 %res +} + +; Test i32->i32 absolute using sgt. +define i32 @f3(i32 %val) { +; CHECK-LABEL: f3: +; CHECK: lpr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sgt i32 %val, 0 + %neg = sub i32 0, %val + %res = select i1 %cmp, i32 %val, i32 %neg + ret i32 %res +} + +; Test i32->i32 absolute using sge. +define i32 @f4(i32 %val) { +; CHECK-LABEL: f4: +; CHECK: lpr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sge i32 %val, 0 + %neg = sub i32 0, %val + %res = select i1 %cmp, i32 %val, i32 %neg + ret i32 %res +} + +; Test i32->i64 absolute. +define i64 @f5(i32 %val) { +; CHECK-LABEL: f5: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %cmp = icmp slt i64 %ext, 0 + %neg = sub i64 0, %ext + %res = select i1 %cmp, i64 %neg, i64 %ext + ret i64 %res +} + +; Test i32->i64 absolute that uses an "in-register" form of sign extension. +define i64 @f6(i64 %val) { +; CHECK-LABEL: f6: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %trunc = trunc i64 %val to i32 + %ext = sext i32 %trunc to i64 + %cmp = icmp slt i64 %ext, 0 + %neg = sub i64 0, %ext + %res = select i1 %cmp, i64 %neg, i64 %ext + ret i64 %res +} + +; Test i64 absolute. +define i64 @f7(i64 %val) { +; CHECK-LABEL: f7: +; CHECK: lpgr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp slt i64 %val, 0 + %neg = sub i64 0, %val + %res = select i1 %cmp, i64 %neg, i64 %val + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-add-01.ll b/test/CodeGen/SystemZ/int-add-01.ll index d12ac229774e4..4114686e41e8d 100644 --- a/test/CodeGen/SystemZ/int-add-01.ll +++ b/test/CodeGen/SystemZ/int-add-01.ll @@ -5,7 +5,7 @@ ; Check the low end of the AH range. define i32 @f1(i32 %lhs, i16 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ah %r2, 0(%r3) ; CHECK: br %r14 %half = load i16 *%src @@ -16,7 +16,7 @@ define i32 @f1(i32 %lhs, i16 *%src) { ; Check the high end of the aligned AH range. define i32 @f2(i32 %lhs, i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ah %r2, 4094(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2047 @@ -28,7 +28,7 @@ define i32 @f2(i32 %lhs, i16 *%src) { ; Check the next halfword up, which should use AHY instead of AH. define i32 @f3(i32 %lhs, i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ahy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2048 @@ -40,7 +40,7 @@ define i32 @f3(i32 %lhs, i16 *%src) { ; Check the high end of the aligned AHY range. define i32 @f4(i32 %lhs, i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ahy %r2, 524286(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -53,7 +53,7 @@ define i32 @f4(i32 %lhs, i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f5(i32 %lhs, i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r3, 524288 ; CHECK: ah %r2, 0(%r3) ; CHECK: br %r14 @@ -66,7 +66,7 @@ define i32 @f5(i32 %lhs, i16 *%src) { ; Check the high end of the negative aligned AHY range. define i32 @f6(i32 %lhs, i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ahy %r2, -2(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -78,7 +78,7 @@ define i32 @f6(i32 %lhs, i16 *%src) { ; Check the low end of the AHY range. define i32 @f7(i32 %lhs, i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ahy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -91,7 +91,7 @@ define i32 @f7(i32 %lhs, i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i32 %lhs, i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, -524290 ; CHECK: ah %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +104,7 @@ define i32 @f8(i32 %lhs, i16 *%src) { ; Check that AH allows an index. define i32 @f9(i32 %lhs, i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ah %r2, 4094({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -118,7 +118,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) { ; Check that AHY allows an index. define i32 @f10(i32 %lhs, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ahy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-add-02.ll b/test/CodeGen/SystemZ/int-add-02.ll index 568ad1c4471d6..4386b5a4d496e 100644 --- a/test/CodeGen/SystemZ/int-add-02.ll +++ b/test/CodeGen/SystemZ/int-add-02.ll @@ -1,10 +1,13 @@ ; Test 32-bit addition in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() ; Check AR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ar %r2, %r3 ; CHECK: br %r14 %add = add i32 %a, %b @@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the A range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: a %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned A range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: a %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use AY instead of A. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ay %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned AY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ay %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: a %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned AY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ay %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the AY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ay %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: a %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that A allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: a %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that AY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: ay %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %add = add i32 %a, %b ret i32 %add } + +; Check that additions of spilled values can use A rather than AR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: a %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %add0 = add i32 %ret, %val0 + %add1 = add i32 %add0, %val1 + %add2 = add i32 %add1, %val2 + %add3 = add i32 %add2, %val3 + %add4 = add i32 %add3, %val4 + %add5 = add i32 %add4, %val5 + %add6 = add i32 %add5, %val6 + %add7 = add i32 %add6, %val7 + %add8 = add i32 %add7, %val8 + %add9 = add i32 %add8, %val9 + + ret i32 %add9 +} diff --git a/test/CodeGen/SystemZ/int-add-03.ll b/test/CodeGen/SystemZ/int-add-03.ll index 46103575b7b2d..56000a80cd9b0 100644 --- a/test/CodeGen/SystemZ/int-add-03.ll +++ b/test/CodeGen/SystemZ/int-add-03.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check AGFR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: agfr %r2, %r3 ; CHECK: br %r14 %bext = sext i32 %b to i64 @@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; Check AGF with no displacement. define i64 @f2(i64 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: agf %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) { ; Check the high end of the aligned AGF range. define i64 @f3(i64 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agf %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: agf %r2, 0(%r3) ; CHECK: br %r14 @@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) { ; Check the high end of the negative aligned AGF range. define i64 @f5(i64 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agf %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) { ; Check the low end of the AGF range. define i64 @f6(i64 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agf %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524292 ; CHECK: agf %r2, 0(%r3) ; CHECK: br %r14 @@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; Check that AGF allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agf %r2, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %add = add i64 %a, %bext ret i64 %add } + +; Check that additions of spilled values can use AGF rather than AGFR. +define i64 @f9(i32 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: agf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %add0 = add i64 %ret, %ext0 + %add1 = add i64 %add0, %ext1 + %add2 = add i64 %add1, %ext2 + %add3 = add i64 %add2, %ext3 + %add4 = add i64 %add3, %ext4 + %add5 = add i64 %add4, %ext5 + %add6 = add i64 %add5, %ext6 + %add7 = add i64 %add6, %ext7 + %add8 = add i64 %add7, %ext8 + %add9 = add i64 %add8, %ext9 + + ret i64 %add9 +} diff --git a/test/CodeGen/SystemZ/int-add-04.ll b/test/CodeGen/SystemZ/int-add-04.ll index 1c2dc76781ce9..675e36babfa7c 100644 --- a/test/CodeGen/SystemZ/int-add-04.ll +++ b/test/CodeGen/SystemZ/int-add-04.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check ALGFR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: algfr %r2, %r3 ; CHECK: br %r14 %bext = zext i32 %b to i64 @@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; Check ALGF with no displacement. define i64 @f2(i64 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: algf %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) { ; Check the high end of the aligned ALGF range. define i64 @f3(i64 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: algf %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: algf %r2, 0(%r3) ; CHECK: br %r14 @@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) { ; Check the high end of the negative aligned ALGF range. define i64 @f5(i64 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: algf %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) { ; Check the low end of the ALGF range. define i64 @f6(i64 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: algf %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524292 ; CHECK: algf %r2, 0(%r3) ; CHECK: br %r14 @@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; Check that ALGF allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: algf %r2, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %add = add i64 %a, %bext ret i64 %add } + +; Check that additions of spilled values can use ALGF rather than ALGFR. +define i64 @f9(i32 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: algf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %add0 = add i64 %ret, %ext0 + %add1 = add i64 %add0, %ext1 + %add2 = add i64 %add1, %ext2 + %add3 = add i64 %add2, %ext3 + %add4 = add i64 %add3, %ext4 + %add5 = add i64 %add4, %ext5 + %add6 = add i64 %add5, %ext6 + %add7 = add i64 %add6, %ext7 + %add8 = add i64 %add7, %ext8 + %add9 = add i64 %add8, %ext9 + + ret i64 %add9 +} diff --git a/test/CodeGen/SystemZ/int-add-05.ll b/test/CodeGen/SystemZ/int-add-05.ll index ae32cc4ad01a5..a05fdd9059c10 100644 --- a/test/CodeGen/SystemZ/int-add-05.ll +++ b/test/CodeGen/SystemZ/int-add-05.ll @@ -1,10 +1,13 @@ ; Test 64-bit addition in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() ; Check AGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: agr %r2, %r3 ; CHECK: br %r14 %add = add i64 %a, %b @@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check AG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ag %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned AG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ag %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: ag %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned AG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: ag %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the AG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ag %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: ag %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that AG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: ag %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %add = add i64 %a, %b ret i64 %add } + +; Check that additions of spilled values can use AG rather than AGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: ag %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %add0 = add i64 %ret, %val0 + %add1 = add i64 %add0, %val1 + %add2 = add i64 %add1, %val2 + %add3 = add i64 %add2, %val3 + %add4 = add i64 %add3, %val4 + %add5 = add i64 %add4, %val5 + %add6 = add i64 %add5, %val6 + %add7 = add i64 %add6, %val7 + %add8 = add i64 %add7, %val8 + %add9 = add i64 %add8, %val9 + + ret i64 %add9 +} diff --git a/test/CodeGen/SystemZ/int-add-06.ll b/test/CodeGen/SystemZ/int-add-06.ll index 3a9c698dd241f..142c7559802e2 100644 --- a/test/CodeGen/SystemZ/int-add-06.ll +++ b/test/CodeGen/SystemZ/int-add-06.ll @@ -4,7 +4,7 @@ ; Check additions of 1. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ahi %r2, 1 ; CHECK: br %r14 %add = add i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the AHI range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ahi %r2, 32767 ; CHECK: br %r14 %add = add i32 %a, 32767 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a) { ; Check the next value up, which must use AFI instead. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: afi %r2, 32768 ; CHECK: br %r14 %add = add i32 %a, 32768 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a) { ; Check the high end of the signed 32-bit range. define i32 @f4(i32 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: afi %r2, 2147483647 ; CHECK: br %r14 %add = add i32 %a, 2147483647 @@ -40,7 +40,7 @@ define i32 @f4(i32 %a) { ; Check the next value up, which is treated as a negative value. define i32 @f5(i32 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: afi %r2, -2147483648 ; CHECK: br %r14 %add = add i32 %a, 2147483648 @@ -49,7 +49,7 @@ define i32 @f5(i32 %a) { ; Check the high end of the negative AHI range. define i32 @f6(i32 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ahi %r2, -1 ; CHECK: br %r14 %add = add i32 %a, -1 @@ -58,7 +58,7 @@ define i32 @f6(i32 %a) { ; Check the low end of the AHI range. define i32 @f7(i32 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ahi %r2, -32768 ; CHECK: br %r14 %add = add i32 %a, -32768 @@ -67,7 +67,7 @@ define i32 @f7(i32 %a) { ; Check the next value down, which must use AFI instead. define i32 @f8(i32 %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: afi %r2, -32769 ; CHECK: br %r14 %add = add i32 %a, -32769 @@ -76,7 +76,7 @@ define i32 @f8(i32 %a) { ; Check the low end of the signed 32-bit range. define i32 @f9(i32 %a) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: afi %r2, -2147483648 ; CHECK: br %r14 %add = add i32 %a, -2147483648 @@ -85,7 +85,7 @@ define i32 @f9(i32 %a) { ; Check the next value down, which is treated as a positive value. define i32 @f10(i32 %a) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: afi %r2, 2147483647 ; CHECK: br %r14 %add = add i32 %a, -2147483649 diff --git a/test/CodeGen/SystemZ/int-add-07.ll b/test/CodeGen/SystemZ/int-add-07.ll index a065bb2ee137f..e9e0212e4df83 100644 --- a/test/CodeGen/SystemZ/int-add-07.ll +++ b/test/CodeGen/SystemZ/int-add-07.ll @@ -4,7 +4,7 @@ ; Check additions of 1. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: {{aghi %r2, 1|la %r[0-5], 1\(%r2\)}} ; CHECK: br %r14 %add = add i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the AGHI range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: aghi %r2, 32767 ; CHECK: br %r14 %add = add i64 %a, 32767 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; Check the next value up, which must use AGFI instead. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: {{agfi %r2, 32768|lay %r[0-5], 32768\(%r2\)}} ; CHECK: br %r14 %add = add i64 %a, 32768 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check the high end of the AGFI range. define i64 @f4(i64 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r2, 2147483647 ; CHECK: br %r14 %add = add i64 %a, 2147483647 @@ -40,7 +40,7 @@ define i64 @f4(i64 %a) { ; Check the next value up, which must use ALGFI instead. define i64 @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: algfi %r2, 2147483648 ; CHECK: br %r14 %add = add i64 %a, 2147483648 @@ -49,7 +49,7 @@ define i64 @f5(i64 %a) { ; Check the high end of the ALGFI range. define i64 @f6(i64 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: algfi %r2, 4294967295 ; CHECK: br %r14 %add = add i64 %a, 4294967295 @@ -58,7 +58,7 @@ define i64 @f6(i64 %a) { ; Check the next value up, which must be loaded into a register first. define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llihl %r0, 1 ; CHECK: agr ; CHECK: br %r14 @@ -68,7 +68,7 @@ define i64 @f7(i64 %a) { ; Check the high end of the negative AGHI range. define i64 @f8(i64 %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r2, -1 ; CHECK: br %r14 %add = add i64 %a, -1 @@ -77,7 +77,7 @@ define i64 @f8(i64 %a) { ; Check the low end of the AGHI range. define i64 @f9(i64 %a) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: aghi %r2, -32768 ; CHECK: br %r14 %add = add i64 %a, -32768 @@ -86,7 +86,7 @@ define i64 @f9(i64 %a) { ; Check the next value down, which must use AGFI instead. define i64 @f10(i64 %a) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: {{agfi %r2, -32769|lay %r[0-5]+, -32769\(%r2\)}} ; CHECK: br %r14 %add = add i64 %a, -32769 @@ -95,7 +95,7 @@ define i64 @f10(i64 %a) { ; Check the low end of the AGFI range. define i64 @f11(i64 %a) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r2, -2147483648 ; CHECK: br %r14 %add = add i64 %a, -2147483648 @@ -104,7 +104,7 @@ define i64 @f11(i64 %a) { ; Check the next value down, which must use SLGFI instead. define i64 @f12(i64 %a) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: slgfi %r2, 2147483649 ; CHECK: br %r14 %add = add i64 %a, -2147483649 @@ -113,7 +113,7 @@ define i64 @f12(i64 %a) { ; Check the low end of the SLGFI range. define i64 @f13(i64 %a) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: slgfi %r2, 4294967295 ; CHECK: br %r14 %add = add i64 %a, -4294967295 @@ -122,7 +122,7 @@ define i64 @f13(i64 %a) { ; Check the next value down, which must use register addition instead. define i64 @f14(i64 %a) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: llihf %r0, 4294967295 ; CHECK: agr ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/int-add-08.ll b/test/CodeGen/SystemZ/int-add-08.ll index b1f820fe3d846..bcef914ed8727 100644 --- a/test/CodeGen/SystemZ/int-add-08.ll +++ b/test/CodeGen/SystemZ/int-add-08.ll @@ -1,10 +1,13 @@ ; Test 128-bit addition in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i128 *@foo() ; Test register addition. define void @f1(i128 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: algr ; CHECK: alcgr ; CHECK: br %r14 @@ -17,7 +20,7 @@ define void @f1(i128 *%ptr) { ; Test memory addition with no offset. Making the load of %a volatile ; should force the memory operand to be %b. define void @f2(i128 *%aptr, i64 %addr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: alg {{%r[0-5]}}, 8(%r3) ; CHECK: alcg {{%r[0-5]}}, 0(%r3) ; CHECK: br %r14 @@ -31,7 +34,7 @@ define void @f2(i128 *%aptr, i64 %addr) { ; Test the highest aligned offset that is in range of both ALG and ALCG. define void @f3(i128 *%aptr, i64 %base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: alg {{%r[0-5]}}, 524280(%r3) ; CHECK: alcg {{%r[0-5]}}, 524272(%r3) ; CHECK: br %r14 @@ -46,7 +49,7 @@ define void @f3(i128 *%aptr, i64 %base) { ; Test the next doubleword up, which requires separate address logic for ALG. define void @f4(i128 *%aptr, i64 %base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgr [[BASE:%r[1-5]]], %r3 ; CHECK: agfi [[BASE]], 524288 ; CHECK: alg {{%r[0-5]}}, 0([[BASE]]) @@ -65,7 +68,7 @@ define void @f4(i128 *%aptr, i64 %base) { ; both instructions. It would be better to create an anchor at 524288 ; that both instructions can use, but that isn't implemented yet. define void @f5(i128 *%aptr, i64 %base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 @@ -80,7 +83,7 @@ define void @f5(i128 *%aptr, i64 %base) { ; Test the lowest displacement that is in range of both ALG and ALCG. define void @f6(i128 *%aptr, i64 %base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: alg {{%r[0-5]}}, -524280(%r3) ; CHECK: alcg {{%r[0-5]}}, -524288(%r3) ; CHECK: br %r14 @@ -95,7 +98,7 @@ define void @f6(i128 *%aptr, i64 %base) { ; Test the next doubleword down, which is out of range of the ALCG. define void @f7(i128 *%aptr, i64 %base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: alg {{%r[0-5]}}, -524288(%r3) ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 @@ -108,3 +111,34 @@ define void @f7(i128 *%aptr, i64 %base) { ret void } +; Check that additions of spilled values can use ALG and ALCG rather than +; ALGR and ALCGR. +define void @f8(i128 *%ptr0) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, foo@PLT +; CHECK: alg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: alcg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i128 *%ptr0, i128 2 + %ptr2 = getelementptr i128 *%ptr0, i128 4 + %ptr3 = getelementptr i128 *%ptr0, i128 6 + %ptr4 = getelementptr i128 *%ptr0, i128 8 + + %val0 = load i128 *%ptr0 + %val1 = load i128 *%ptr1 + %val2 = load i128 *%ptr2 + %val3 = load i128 *%ptr3 + %val4 = load i128 *%ptr4 + + %retptr = call i128 *@foo() + + %ret = load i128 *%retptr + %add0 = add i128 %ret, %val0 + %add1 = add i128 %add0, %val1 + %add2 = add i128 %add1, %val2 + %add3 = add i128 %add2, %val3 + %add4 = add i128 %add3, %val4 + store i128 %add4, i128 *%retptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll index bfe63389f1893..fd151a7f979a1 100644 --- a/test/CodeGen/SystemZ/int-add-09.ll +++ b/test/CodeGen/SystemZ/int-add-09.ll @@ -1,13 +1,13 @@ ; Test 128-bit addition in which the second operand is constant. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check additions of 1. The XOR ensures that we don't instead load the ; constant into a register and use memory addition. define void @f1(i128 *%aptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: algfi {{%r[0-5]}}, 1 -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 128 @@ -18,9 +18,9 @@ define void @f1(i128 *%aptr) { ; Check the high end of the ALGFI range. define void @f2(i128 *%aptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: algfi {{%r[0-5]}}, 4294967295 -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 128 @@ -31,9 +31,9 @@ define void @f2(i128 *%aptr) { ; Check the next value up, which must use register addition. define void @f3(i128 *%aptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: algr -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 128 @@ -44,9 +44,9 @@ define void @f3(i128 *%aptr) { ; Check addition of -1, which must also use register addition. define void @f4(i128 *%aptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: algr -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 128 diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll index 17cfdbe337717..01d0a661ed293 100644 --- a/test/CodeGen/SystemZ/int-add-10.ll +++ b/test/CodeGen/SystemZ/int-add-10.ll @@ -5,9 +5,9 @@ ; Check register additions. The XOR ensures that we don't instead zero-extend ; %b into a register and use memory addition. define void @f1(i128 *%aptr, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: algfr {{%r[0-5]}}, %r3 -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -19,9 +19,9 @@ define void @f1(i128 *%aptr, i32 %b) { ; Like f1, but using an "in-register" extension. define void @f2(i128 *%aptr, i64 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: algfr {{%r[0-5]}}, %r3 -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -35,9 +35,9 @@ define void @f2(i128 *%aptr, i64 %b) { ; Test register addition in cases where the second operand is zero extended ; from i64 rather than i32, but is later masked to i32 range. define void @f3(i128 *%aptr, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: algfr {{%r[0-5]}}, %r3 -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -50,9 +50,9 @@ define void @f3(i128 *%aptr, i64 %b) { ; Test ALGF with no offset. define void @f4(i128 *%aptr, i32 *%bsrc) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: algf {{%r[0-5]}}, 0(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -65,9 +65,9 @@ define void @f4(i128 *%aptr, i32 *%bsrc) { ; Check the high end of the ALGF range. define void @f5(i128 *%aptr, i32 *%bsrc) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: algf {{%r[0-5]}}, 524284(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -82,10 +82,10 @@ define void @f5(i128 *%aptr, i32 *%bsrc) { ; Check the next word up, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f6(i128 *%aptr, i32 *%bsrc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: algf {{%r[0-5]}}, 0(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -99,9 +99,9 @@ define void @f6(i128 *%aptr, i32 *%bsrc) { ; Check the high end of the negative aligned ALGF range. define void @f7(i128 *%aptr, i32 *%bsrc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: algf {{%r[0-5]}}, -4(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -115,9 +115,9 @@ define void @f7(i128 *%aptr, i32 *%bsrc) { ; Check the low end of the ALGF range. define void @f8(i128 *%aptr, i32 *%bsrc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: algf {{%r[0-5]}}, -524288(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -132,10 +132,10 @@ define void @f8(i128 *%aptr, i32 *%bsrc) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f9(i128 *%aptr, i32 *%bsrc) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: algf {{%r[0-5]}}, 0(%r3) -; CHECK: alcgr +; CHECK: alcg ; CHECK: br %r14 %a = load i128 *%aptr %xor = xor i128 %a, 127 @@ -149,7 +149,7 @@ define void @f9(i128 *%aptr, i32 *%bsrc) { ; Check that ALGF allows an index. define void @f10(i128 *%aptr, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %a = load i128 *%aptr diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll index 47a776ecf6ece..679c206094f3b 100644 --- a/test/CodeGen/SystemZ/int-add-11.ll +++ b/test/CodeGen/SystemZ/int-add-11.ll @@ -1,10 +1,11 @@ -; Test 32-bit additions of constants to memory. +; Test 32-bit additions of constants to memory. The tests here +; assume z10 register pressure, without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Check additions of 1. define void @f1(i32 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: asi 0(%r2), 1 ; CHECK: br %r14 %val = load i32 *%ptr @@ -15,7 +16,7 @@ define void @f1(i32 *%ptr) { ; Check the high end of the constant range. define void @f2(i32 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: asi 0(%r2), 127 ; CHECK: br %r14 %val = load i32 *%ptr @@ -27,7 +28,7 @@ define void @f2(i32 *%ptr) { ; Check the next constant up, which must use an addition and a store. ; Both L/AHI and LHI/A would be OK. define void @f3(i32 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: asi ; CHECK: st %r0, 0(%r2) ; CHECK: br %r14 @@ -39,7 +40,7 @@ define void @f3(i32 *%ptr) { ; Check the low end of the constant range. define void @f4(i32 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: asi 0(%r2), -128 ; CHECK: br %r14 %val = load i32 *%ptr @@ -50,7 +51,7 @@ define void @f4(i32 *%ptr) { ; Check the next value down, with the same comment as f3. define void @f5(i32 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: asi ; CHECK: st %r0, 0(%r2) ; CHECK: br %r14 @@ -62,7 +63,7 @@ define void @f5(i32 *%ptr) { ; Check the high end of the aligned ASI range. define void @f6(i32 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: asi 524284(%r2), 1 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131071 @@ -75,7 +76,7 @@ define void @f6(i32 *%base) { ; Check the next word up, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f7(i32 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r2, 524288 ; CHECK: asi 0(%r2), 1 ; CHECK: br %r14 @@ -88,7 +89,7 @@ define void @f7(i32 *%base) { ; Check the low end of the ASI range. define void @f8(i32 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: asi -524288(%r2), 1 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131072 @@ -101,7 +102,7 @@ define void @f8(i32 *%base) { ; Check the next word down, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f9(i32 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524292 ; CHECK: asi 0(%r2), 1 ; CHECK: br %r14 @@ -114,7 +115,7 @@ define void @f9(i32 *%base) { ; Check that ASI does not allow indices. define void @f10(i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agr %r2, %r3 ; CHECK: asi 4(%r2), 1 ; CHECK: br %r14 @@ -126,3 +127,169 @@ define void @f10(i64 %base, i64 %index) { store i32 %add, i32 *%ptr ret void } + +; Check that adding 127 to a spilled value can use ASI. +define void @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: asi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %add0 = add i32 %val0, 127 + %add1 = add i32 %val1, 127 + %add2 = add i32 %val2, 127 + %add3 = add i32 %val3, 127 + %add4 = add i32 %val4, 127 + %add5 = add i32 %val5, 127 + %add6 = add i32 %val6, 127 + %add7 = add i32 %val7, 127 + %add8 = add i32 %val8, 127 + %add9 = add i32 %val9, 127 + %add10 = add i32 %val10, 127 + %add11 = add i32 %val11, 127 + %add12 = add i32 %val12, 127 + %add13 = add i32 %val13, 127 + %add14 = add i32 %val14, 127 + %add15 = add i32 %val15, 127 + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret void +} + +; Check that adding -128 to a spilled value can use ASI. +define void @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: asi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %add0 = add i32 %val0, -128 + %add1 = add i32 %val1, -128 + %add2 = add i32 %val2, -128 + %add3 = add i32 %val3, -128 + %add4 = add i32 %val4, -128 + %add5 = add i32 %val5, -128 + %add6 = add i32 %val6, -128 + %add7 = add i32 %val7, -128 + %add8 = add i32 %val8, -128 + %add9 = add i32 %val9, -128 + %add10 = add i32 %val10, -128 + %add11 = add i32 %val11, -128 + %add12 = add i32 %val12, -128 + %add13 = add i32 %val13, -128 + %add14 = add i32 %val14, -128 + %add15 = add i32 %val15, -128 + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll index ae1c1f735fa76..741cce19d72c3 100644 --- a/test/CodeGen/SystemZ/int-add-12.ll +++ b/test/CodeGen/SystemZ/int-add-12.ll @@ -4,7 +4,7 @@ ; Check additions of 1. define void @f1(i64 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: agsi 0(%r2), 1 ; CHECK: br %r14 %val = load i64 *%ptr @@ -15,7 +15,7 @@ define void @f1(i64 *%ptr) { ; Check the high end of the constant range. define void @f2(i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: agsi 0(%r2), 127 ; CHECK: br %r14 %val = load i64 *%ptr @@ -27,7 +27,7 @@ define void @f2(i64 *%ptr) { ; Check the next constant up, which must use an addition and a store. ; Both LG/AGHI and LGHI/AG would be OK. define void @f3(i64 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: agsi ; CHECK: stg %r0, 0(%r2) ; CHECK: br %r14 @@ -39,7 +39,7 @@ define void @f3(i64 *%ptr) { ; Check the low end of the constant range. define void @f4(i64 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agsi 0(%r2), -128 ; CHECK: br %r14 %val = load i64 *%ptr @@ -50,7 +50,7 @@ define void @f4(i64 *%ptr) { ; Check the next value down, with the same comment as f3. define void @f5(i64 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: agsi ; CHECK: stg %r0, 0(%r2) ; CHECK: br %r14 @@ -62,7 +62,7 @@ define void @f5(i64 *%ptr) { ; Check the high end of the aligned AGSI range. define void @f6(i64 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agsi 524280(%r2), 1 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 65535 @@ -75,7 +75,7 @@ define void @f6(i64 *%base) { ; Check the next doubleword up, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f7(i64 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r2, 524288 ; CHECK: agsi 0(%r2), 1 ; CHECK: br %r14 @@ -88,7 +88,7 @@ define void @f7(i64 *%base) { ; Check the low end of the AGSI range. define void @f8(i64 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agsi -524288(%r2), 1 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -65536 @@ -101,7 +101,7 @@ define void @f8(i64 *%base) { ; Check the next doubleword down, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f9(i64 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524296 ; CHECK: agsi 0(%r2), 1 ; CHECK: br %r14 @@ -114,7 +114,7 @@ define void @f9(i64 *%base) { ; Check that AGSI does not allow indices. define void @f10(i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agr %r2, %r3 ; CHECK: agsi 8(%r2), 1 ; CHECK: br %r14 @@ -126,3 +126,169 @@ define void @f10(i64 %base, i64 %index) { store i64 %add, i64 *%ptr ret void } + +; Check that adding 127 to a spilled value can use AGSI. +define void @f11(i64 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: agsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %add0 = add i64 %val0, 127 + %add1 = add i64 %val1, 127 + %add2 = add i64 %val2, 127 + %add3 = add i64 %val3, 127 + %add4 = add i64 %val4, 127 + %add5 = add i64 %val5, 127 + %add6 = add i64 %val6, 127 + %add7 = add i64 %val7, 127 + %add8 = add i64 %val8, 127 + %add9 = add i64 %val9, 127 + %add10 = add i64 %val10, 127 + %add11 = add i64 %val11, 127 + %add12 = add i64 %val12, 127 + %add13 = add i64 %val13, 127 + %add14 = add i64 %val14, 127 + %add15 = add i64 %val15, 127 + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret void +} + +; Check that adding -128 to a spilled value can use AGSI. +define void @f12(i64 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: agsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %add0 = add i64 %val0, -128 + %add1 = add i64 %val1, -128 + %add2 = add i64 %val2, -128 + %add3 = add i64 %val3, -128 + %add4 = add i64 %val4, -128 + %add5 = add i64 %val5, -128 + %add6 = add i64 %val6, -128 + %add7 = add i64 %val7, -128 + %add8 = add i64 %val8, -128 + %add9 = add i64 %val9, -128 + %add10 = add i64 %val10, -128 + %add11 = add i64 %val11, -128 + %add12 = add i64 %val12, -128 + %add13 = add i64 %val13, -128 + %add14 = add i64 %val14, -128 + %add15 = add i64 %val15, -128 + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-13.ll b/test/CodeGen/SystemZ/int-add-13.ll new file mode 100644 index 0000000000000..7dfabbcc59e01 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-13.ll @@ -0,0 +1,39 @@ +; Test the three-operand forms of addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check ARK. +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f1: +; CHECK: ark %r2, %r3, %r4 +; CHECK: br %r14 + %add = add i32 %b, %c + ret i32 %add +} + +; Check that we can still use AR in obvious cases. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: ar %r2, %r3 +; CHECK: br %r14 + %add = add i32 %a, %b + ret i32 %add +} + +; Check AGRK. +define i64 @f3(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: f3: +; CHECK: agrk %r2, %r3, %r4 +; CHECK: br %r14 + %add = add i64 %b, %c + ret i64 %add +} + +; Check that we can still use AGR in obvious cases. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agr %r2, %r3 +; CHECK: br %r14 + %add = add i64 %a, %b + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-14.ll b/test/CodeGen/SystemZ/int-add-14.ll new file mode 100644 index 0000000000000..07323789c3b07 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-14.ll @@ -0,0 +1,67 @@ +; Test 32-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check additions of 1. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: ahik %r2, %r3, 1 +; CHECK: br %r14 + %add = add i32 %b, 1 + ret i32 %add +} + +; Check the high end of the AHIK range. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: ahik %r2, %r3, 32767 +; CHECK: br %r14 + %add = add i32 %b, 32767 + ret i32 %add +} + +; Check the next value up, which must use AFI instead. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: afi {{%r[0-5]}}, 32768 +; CHECK: br %r14 + %add = add i32 %b, 32768 + ret i32 %add +} + +; Check the high end of the negative AHIK range. +define i32 @f4(i32 %a, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: ahik %r2, %r3, -1 +; CHECK: br %r14 + %add = add i32 %b, -1 + ret i32 %add +} + +; Check the low end of the AHIK range. +define i32 @f5(i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: ahik %r2, %r3, -32768 +; CHECK: br %r14 + %add = add i32 %b, -32768 + ret i32 %add +} + +; Check the next value down, which must use AFI instead. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: afi {{%r[0-5]}}, -32769 +; CHECK: br %r14 + %add = add i32 %b, -32769 + ret i32 %add +} + +; Check that AHI is still used in obvious cases. +define i32 @f7(i32 %a) { +; CHECK-LABEL: f7: +; CHECK: ahi %r2, 1 +; CHECK: br %r14 + %add = add i32 %a, 1 + ret i32 %add +} diff --git a/test/CodeGen/SystemZ/int-add-15.ll b/test/CodeGen/SystemZ/int-add-15.ll new file mode 100644 index 0000000000000..041ec19142dce --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-15.ll @@ -0,0 +1,67 @@ +; Test 64-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check additions of 1. +define i64 @f1(i64 %a, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: {{aghik %r2, %r3, 1|la %r2, 1\(%r3\)}} +; CHECK: br %r14 + %add = add i64 %b, 1 + ret i64 %add +} + +; Check the high end of the AGHIK range. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: aghik %r2, %r3, 32767 +; CHECK: br %r14 + %add = add i64 %b, 32767 + ret i64 %add +} + +; Check the next value up, which must use AGFI instead. +define i64 @f3(i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: {{agfi %r[0-5], 32768|lay %r2, 32768\(%r3\)}} +; CHECK: br %r14 + %add = add i64 %b, 32768 + ret i64 %add +} + +; Check the high end of the negative AGHIK range. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: aghik %r2, %r3, -1 +; CHECK: br %r14 + %add = add i64 %b, -1 + ret i64 %add +} + +; Check the low end of the AGHIK range. +define i64 @f5(i64 %a, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: aghik %r2, %r3, -32768 +; CHECK: br %r14 + %add = add i64 %b, -32768 + ret i64 %add +} + +; Check the next value down, which must use AGFI instead. +define i64 @f6(i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: {{agfi %r[0-5], -32769|lay %r2, -32769\(%r3\)}} +; CHECK: br %r14 + %add = add i64 %b, -32769 + ret i64 %add +} + +; Check that AGHI is still used in obvious cases. +define i64 @f7(i64 %a) { +; CHECK-LABEL: f7: +; CHECK: aghi %r2, 32000 +; CHECK: br %r14 + %add = add i64 %a, 32000 + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-16.ll b/test/CodeGen/SystemZ/int-add-16.ll new file mode 100644 index 0000000000000..36cc13e5fc6e7 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-16.ll @@ -0,0 +1,93 @@ +; Test 128-bit addition when the distinct-operands facility is available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test the case where both operands are in registers. +define i64 @f1(i64 %a, i64 %b, i64 %c, i64 %d, i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: algrk %r2, %r4, %r5 +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %b, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %c, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %y2 = insertelement <2 x i64> %x1, i64 %d, i32 1 + %y = bitcast <2 x i64> %y2 to i128 + %add = add i128 %x, %y + %addv = bitcast i128 %add to <2 x i64> + %high = extractelement <2 x i64> %addv, i32 0 + store i64 %high, i64 *%ptr + %low = extractelement <2 x i64> %addv, i32 1 + ret i64 %low +} + +; Test addition of 1. +define void @f2(i64 %a, i64 %b, i128 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: alghsik {{%r[0-5]}}, %r3, 1 +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %a, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %add = add i128 %x, 1 + store i128 %add, i128 *%ptr + ret void +} + +; Test the upper end of the ALGHSIK range. +define void @f3(i64 %a, i64 %b, i128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: alghsik {{%r[0-5]}}, %r3, 32767 +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %a, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %add = add i128 %x, 32767 + store i128 %add, i128 *%ptr + ret void +} + +; Test the next value up, which should use ALGFI instead. +define void @f4(i64 %a, i64 %b, i128 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: algfi %r3, 32768 +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %a, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %add = add i128 %x, 32768 + store i128 %add, i128 *%ptr + ret void +} + +; Test the lower end of the ALGHSIK range. +define void @f5(i64 %a, i64 %b, i128 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: alghsik {{%r[0-5]}}, %r3, -32768 +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %a, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %add = add i128 %x, -32768 + store i128 %add, i128 *%ptr + ret void +} + +; Test the next value down, which cannot use either ALGHSIK or ALGFI. +define void @f6(i64 %a, i64 %b, i128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK-NOT: alghsik +; CHECK-NOT: algfi +; CHECK: alcgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %a, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %add = add i128 %x, -32769 + store i128 %add, i128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll index aa432f0b04fba..6653b6f706f7c 100644 --- a/test/CodeGen/SystemZ/int-cmp-01.ll +++ b/test/CodeGen/SystemZ/int-cmp-01.ll @@ -5,7 +5,7 @@ ; Check the low end of the CH range. define void @f1(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ch %r2, 0(%r3) ; CHECK: br %r14 %half = load i16 *%src @@ -18,7 +18,7 @@ define void @f1(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the high end of the aligned CH range. define void @f2(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ch %r2, 4094(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2047 @@ -32,7 +32,7 @@ define void @f2(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the next halfword up, which should use CHY instead of CH. define void @f3(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: chy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2048 @@ -46,7 +46,7 @@ define void @f3(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the high end of the aligned CHY range. define void @f4(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: chy %r2, 524286(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -61,7 +61,7 @@ define void @f4(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f5(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r3, 524288 ; CHECK: ch %r2, 0(%r3) ; CHECK: br %r14 @@ -76,7 +76,7 @@ define void @f5(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the high end of the negative aligned CHY range. define void @f6(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: chy %r2, -2(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -90,7 +90,7 @@ define void @f6(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the low end of the CHY range. define void @f7(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: chy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -105,7 +105,7 @@ define void @f7(i32 %lhs, i16 *%src, i32 *%dst) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(i32 %lhs, i16 *%src, i32 *%dst) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, -524290 ; CHECK: ch %r2, 0(%r3) ; CHECK: br %r14 @@ -120,7 +120,7 @@ define void @f8(i32 %lhs, i16 *%src, i32 *%dst) { ; Check that CH allows an index. define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ch %r2, 4094({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -136,7 +136,7 @@ define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { ; Check that CHY allows an index. define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: chy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -149,3 +149,17 @@ define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { store i32 %res, i32 *%dst ret void } + +; Check the comparison can be reversed if that allows CH to be used. +define double @f11(double %a, double %b, i32 %rhs, i16 *%src) { +; CHECK-LABEL: f11: +; CHECK: ch %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %half = load i16 *%src + %lhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll index c158fb4af77f9..4a8a1a9ade33d 100644 --- a/test/CodeGen/SystemZ/int-cmp-02.ll +++ b/test/CodeGen/SystemZ/int-cmp-02.ll @@ -2,11 +2,12 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @foo() + ; Check register comparison. define double @f1(double %a, double %b, i32 %i1, i32 %i2) { -; CHECK: f1: -; CHECK: cr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: crjl %r2, %r3 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, %i2 @@ -16,9 +17,9 @@ define double @f1(double %a, double %b, i32 %i1, i32 %i2) { ; Check the low end of the C range. define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: c %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = load i32 *%ptr @@ -29,9 +30,9 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { ; Check the high end of the aligned C range. define double @f3(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: c %r2, 4092(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1023 @@ -43,9 +44,9 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which should use CY instead of C. define double @f4(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cy %r2, 4096(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1024 @@ -57,9 +58,9 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) { ; Check the high end of the aligned CY range. define double @f5(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cy %r2, 524284(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131071 @@ -72,10 +73,10 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f6(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: c %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131072 @@ -87,9 +88,9 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) { ; Check the high end of the negative aligned CY range. define double @f7(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: cy %r2, -4(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -101,9 +102,9 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) { ; Check the low end of the CY range. define double @f8(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cy %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131072 @@ -116,10 +117,10 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f9(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: c %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131073 @@ -131,9 +132,9 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) { ; Check that C allows an index. define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: c %r2, 4092({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -147,9 +148,9 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { ; Check that CY allows an index. define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: cy %r2, 4096({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -160,3 +161,36 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; The first branch here got recreated by InsertBranch while splitting the +; critical edge %entry->%while.body, which lost the kills information for CC. +define void @f12(i32 %a, i32 %b) { +; CHECK-LABEL: f12: +; CHECK: cije %r2, 0 +; CHECK: crjlh %r2, +; CHECK: br %r14 +entry: + %cmp11 = icmp eq i32 %a, 0 + br i1 %cmp11, label %while.end, label %while.body + +while.body: + %c = call i32 @foo() + %cmp12 = icmp eq i32 %c, %b + br i1 %cmp12, label %while.end, label %while.body + +while.end: + ret void +} + +; Check the comparison can be reversed if that allows C to be used. +define double @f13(double %a, double %b, i32 %i2, i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: c %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i1 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll index 4203bee6ac440..aa654e086dc60 100644 --- a/test/CodeGen/SystemZ/int-cmp-03.ll +++ b/test/CodeGen/SystemZ/int-cmp-03.ll @@ -4,9 +4,8 @@ ; Check register comparison. define double @f1(double %a, double %b, i32 %i1, i32 %i2) { -; CHECK: f1: -; CHECK: clr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: clrjl %r2, %r3 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ult i32 %i1, %i2 @@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i32 %i1, i32 %i2) { ; Check the low end of the CL range. define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cl %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = load i32 *%ptr @@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { ; Check the high end of the aligned CL range. define double @f3(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cl %r2, 4092(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1023 @@ -43,9 +42,9 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which should use CLY instead of CL. define double @f4(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cly %r2, 4096(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1024 @@ -57,9 +56,9 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) { ; Check the high end of the aligned CLY range. define double @f5(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cly %r2, 524284(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131071 @@ -72,10 +71,10 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f6(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: cl %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131072 @@ -87,9 +86,9 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) { ; Check the high end of the negative aligned CLY range. define double @f7(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: cly %r2, -4(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -101,9 +100,9 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) { ; Check the low end of the CLY range. define double @f8(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cly %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131072 @@ -116,10 +115,10 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f9(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: cl %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131073 @@ -131,9 +130,9 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) { ; Check that CL allows an index. define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: cl %r2, 4092({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -147,9 +146,9 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { ; Check that CLY allows an index. define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: cly %r2, 4096({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -160,3 +159,16 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; Check the comparison can be reversed if that allows CL to be used. +define double @f12(double %a, double %b, i32 %i2, i32 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: cl %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i1 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll index d0625fbddbae9..a6606f3929239 100644 --- a/test/CodeGen/SystemZ/int-cmp-04.ll +++ b/test/CodeGen/SystemZ/int-cmp-04.ll @@ -5,7 +5,7 @@ ; Check CGH with no displacement. define void @f1(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cgh %r2, 0(%r3) ; CHECK: br %r14 %half = load i16 *%src @@ -18,7 +18,7 @@ define void @f1(i64 %lhs, i16 *%src, i64 *%dst) { ; Check the high end of the aligned CGH range. define void @f2(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cgh %r2, 524286(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -33,7 +33,7 @@ define void @f2(i64 %lhs, i16 *%src, i64 *%dst) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f3(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r3, 524288 ; CHECK: cgh %r2, 0(%r3) ; CHECK: br %r14 @@ -48,7 +48,7 @@ define void @f3(i64 %lhs, i16 *%src, i64 *%dst) { ; Check the high end of the negative aligned CGH range. define void @f4(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cgh %r2, -2(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -62,7 +62,7 @@ define void @f4(i64 %lhs, i16 *%src, i64 *%dst) { ; Check the low end of the CGH range. define void @f5(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cgh %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -77,7 +77,7 @@ define void @f5(i64 %lhs, i16 *%src, i64 *%dst) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f6(i64 %lhs, i16 *%src, i64 *%dst) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, -524290 ; CHECK: cgh %r2, 0(%r3) ; CHECK: br %r14 @@ -92,7 +92,7 @@ define void @f6(i64 %lhs, i16 *%src, i64 *%dst) { ; Check that CGH allows an index. define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: cgh %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -105,3 +105,17 @@ define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) { store i64 %res, i64 *%dst ret void } + +; Check the comparison can be reversed if that allows CGH to be used. +define double @f8(double %a, double %b, i64 %rhs, i16 *%src) { +; CHECK-LABEL: f8: +; CHECK: cgh %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %half = load i16 *%src + %lhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll index 2ab64d5319a80..f15b76bb87fe3 100644 --- a/test/CodeGen/SystemZ/int-cmp-05.ll +++ b/test/CodeGen/SystemZ/int-cmp-05.ll @@ -2,11 +2,13 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check signed register comparison. define double @f1(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = sext i32 %unext to i64 @@ -17,7 +19,7 @@ define double @f1(double %a, double %b, i64 %i1, i32 %unext) { ; Check unsigned register comparison, which can't use CGFR. define double @f2(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: cgfr ; CHECK: br %r14 %i2 = sext i32 %unext to i64 @@ -28,9 +30,9 @@ define double @f2(double %a, double %b, i64 %i1, i32 %unext) { ; Check register equality. define double @f3(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = sext i32 %unext to i64 @@ -41,9 +43,9 @@ define double @f3(double %a, double %b, i64 %i1, i32 %unext) { ; Check register inequality. define double @f4(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = sext i32 %unext to i64 @@ -52,11 +54,11 @@ define double @f4(double %a, double %b, i64 %i1, i32 %unext) { ret double %res } -; Check signed comparisonn with memory. +; Check signed comparison with memory. define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -68,7 +70,7 @@ define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check unsigned comparison with memory. define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: cgf ; CHECK: br %r14 %unext = load i32 *%ptr @@ -80,9 +82,9 @@ define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check memory equality. define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: cgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -94,9 +96,9 @@ define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check memory inequality. define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -108,9 +110,9 @@ define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check the high end of the aligned CGF range. define double @f9(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cgf %r2, 524284(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131071 @@ -124,10 +126,10 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f10(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r3, 524288 ; CHECK: cgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131072 @@ -140,9 +142,9 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%base) { ; Check the high end of the negative aligned CGF range. define double @f11(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: cgf %r2, -4(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -155,9 +157,9 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%base) { ; Check the low end of the CGF range. define double @f12(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: cgf %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131072 @@ -171,10 +173,10 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%base) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f13(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agfi %r3, -524292 ; CHECK: cgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131073 @@ -187,9 +189,9 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) { ; Check that CGF allows an index. define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: cgf %r2, 524284({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -201,3 +203,104 @@ define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; Check that comparisons of spilled values can use CGF rather than CGFR. +define i64 @f15(i32 *%ptr0) { +; CHECK-LABEL: f15: +; CHECK: brasl %r14, foo@PLT +; CHECK: cgf {{%r[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %cmp0 = icmp slt i64 %ret, %ext0 + %cmp1 = icmp slt i64 %ret, %ext1 + %cmp2 = icmp slt i64 %ret, %ext2 + %cmp3 = icmp slt i64 %ret, %ext3 + %cmp4 = icmp slt i64 %ret, %ext4 + %cmp5 = icmp slt i64 %ret, %ext5 + %cmp6 = icmp slt i64 %ret, %ext6 + %cmp7 = icmp slt i64 %ret, %ext7 + %cmp8 = icmp slt i64 %ret, %ext8 + %cmp9 = icmp slt i64 %ret, %ext9 + + %sel0 = select i1 %cmp0, i64 %ret, i64 0 + %sel1 = select i1 %cmp1, i64 %sel0, i64 1 + %sel2 = select i1 %cmp2, i64 %sel1, i64 2 + %sel3 = select i1 %cmp3, i64 %sel2, i64 3 + %sel4 = select i1 %cmp4, i64 %sel3, i64 4 + %sel5 = select i1 %cmp5, i64 %sel4, i64 5 + %sel6 = select i1 %cmp6, i64 %sel5, i64 6 + %sel7 = select i1 %cmp7, i64 %sel6, i64 7 + %sel8 = select i1 %cmp8, i64 %sel7, i64 8 + %sel9 = select i1 %cmp9, i64 %sel8, i64 9 + + ret i64 %sel9 +} + +; Check the comparison can be reversed if that allows CGF to be used. +define double @f16(double %a, double %b, i64 %i2, i32 *%ptr) { +; CHECK-LABEL: f16: +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i1 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll index 26f6dbfe6f229..8ab62e89ec39b 100644 --- a/test/CodeGen/SystemZ/int-cmp-06.ll +++ b/test/CodeGen/SystemZ/int-cmp-06.ll @@ -2,11 +2,13 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check unsigned register comparison. define double @f1(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = zext i32 %unext to i64 @@ -17,9 +19,9 @@ define double @f1(double %a, double %b, i64 %i1, i32 %unext) { ; ...and again with a different representation. define double @f2(double %a, double %b, i64 %i1, i64 %unext) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = and i64 %unext, 4294967295 @@ -30,7 +32,7 @@ define double @f2(double %a, double %b, i64 %i1, i64 %unext) { ; Check signed register comparison, which can't use CLGFR. define double @f3(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clgfr ; CHECK: br %r14 %i2 = zext i32 %unext to i64 @@ -41,7 +43,7 @@ define double @f3(double %a, double %b, i64 %i1, i32 %unext) { ; ...and again with a different representation define double @f4(double %a, double %b, i64 %i1, i64 %unext) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: clgfr ; CHECK: br %r14 %i2 = and i64 %unext, 4294967295 @@ -52,9 +54,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 %unext) { ; Check register equality. define double @f5(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = zext i32 %unext to i64 @@ -65,9 +67,9 @@ define double @f5(double %a, double %b, i64 %i1, i32 %unext) { ; ...and again with a different representation define double @f6(double %a, double %b, i64 %i1, i64 %unext) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = and i64 %unext, 4294967295 @@ -78,9 +80,9 @@ define double @f6(double %a, double %b, i64 %i1, i64 %unext) { ; Check register inequality. define double @f7(double %a, double %b, i64 %i1, i32 %unext) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = zext i32 %unext to i64 @@ -91,9 +93,9 @@ define double @f7(double %a, double %b, i64 %i1, i32 %unext) { ; ...and again with a different representation define double @f8(double %a, double %b, i64 %i1, i64 %unext) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clgfr %r2, %r3 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = and i64 %unext, 4294967295 @@ -102,11 +104,11 @@ define double @f8(double %a, double %b, i64 %i1, i64 %unext) { ret double %res } -; Check unsigned comparisonn with memory. +; Check unsigned comparison with memory. define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: clgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -118,7 +120,7 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check signed comparison with memory. define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: clgf ; CHECK: br %r14 %unext = load i32 *%ptr @@ -130,9 +132,9 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check memory equality. define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: clgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -144,9 +146,9 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check memory inequality. define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: clgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %unext = load i32 *%ptr @@ -158,9 +160,9 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) { ; Check the high end of the aligned CLGF range. define double @f13(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: clgf %r2, 524284(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131071 @@ -174,10 +176,10 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f14(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: agfi %r3, 524288 ; CHECK: clgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 131072 @@ -190,9 +192,9 @@ define double @f14(double %a, double %b, i64 %i1, i32 *%base) { ; Check the high end of the negative aligned CLGF range. define double @f15(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: clgf %r2, -4(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -205,9 +207,9 @@ define double @f15(double %a, double %b, i64 %i1, i32 *%base) { ; Check the low end of the CLGF range. define double @f16(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: clgf %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131072 @@ -221,10 +223,10 @@ define double @f16(double %a, double %b, i64 %i1, i32 *%base) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f17(double %a, double %b, i64 %i1, i32 *%base) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: agfi %r3, -524292 ; CHECK: clgf %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -131073 @@ -237,9 +239,9 @@ define double @f17(double %a, double %b, i64 %i1, i32 *%base) { ; Check that CLGF allows an index. define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: clgf %r2, 524284({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -251,3 +253,104 @@ define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; Check that comparisons of spilled values can use CLGF rather than CLGFR. +define i64 @f19(i32 *%ptr0) { +; CHECK-LABEL: f19: +; CHECK: brasl %r14, foo@PLT +; CHECK: clgf {{%r[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %cmp0 = icmp ult i64 %ret, %ext0 + %cmp1 = icmp ult i64 %ret, %ext1 + %cmp2 = icmp ult i64 %ret, %ext2 + %cmp3 = icmp ult i64 %ret, %ext3 + %cmp4 = icmp ult i64 %ret, %ext4 + %cmp5 = icmp ult i64 %ret, %ext5 + %cmp6 = icmp ult i64 %ret, %ext6 + %cmp7 = icmp ult i64 %ret, %ext7 + %cmp8 = icmp ult i64 %ret, %ext8 + %cmp9 = icmp ult i64 %ret, %ext9 + + %sel0 = select i1 %cmp0, i64 %ret, i64 0 + %sel1 = select i1 %cmp1, i64 %sel0, i64 1 + %sel2 = select i1 %cmp2, i64 %sel1, i64 2 + %sel3 = select i1 %cmp3, i64 %sel2, i64 3 + %sel4 = select i1 %cmp4, i64 %sel3, i64 4 + %sel5 = select i1 %cmp5, i64 %sel4, i64 5 + %sel6 = select i1 %cmp6, i64 %sel5, i64 6 + %sel7 = select i1 %cmp7, i64 %sel6, i64 7 + %sel8 = select i1 %cmp8, i64 %sel7, i64 8 + %sel9 = select i1 %cmp9, i64 %sel8, i64 9 + + ret i64 %sel9 +} + +; Check the comparison can be reversed if that allows CLGF to be used. +define double @f20(double %a, double %b, i64 %i2, i32 *%ptr) { +; CHECK-LABEL: f20: +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i1 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll index 1a6f6226dd9f0..530d1787a770c 100644 --- a/test/CodeGen/SystemZ/int-cmp-07.ll +++ b/test/CodeGen/SystemZ/int-cmp-07.ll @@ -4,9 +4,8 @@ ; Check CGR. define double @f1(double %a, double %b, i64 %i1, i64 %i2) { -; CHECK: f1: -; CHECK: cgr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: cgrjl %r2, %r3 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, %i2 @@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i64 %i1, i64 %i2) { ; Check CG with no displacement. define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = load i64 *%ptr @@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { ; Check the high end of the aligned CG range. define double @f3(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cg %r2, 524280(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 65535 @@ -44,10 +43,10 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: cg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 65536 @@ -59,9 +58,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) { ; Check the high end of the negative aligned CG range. define double @f5(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cg %r2, -8(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -1 @@ -73,9 +72,9 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) { ; Check the low end of the CG range. define double @f6(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cg %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -65536 @@ -88,10 +87,10 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f7(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: cg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -65537 @@ -103,9 +102,9 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) { ; Check that CG allows an index. define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cg %r2, 524280({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -116,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; Check the comparison can be reversed if that allows CG to be used. +define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: cg %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i1 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll index 6e9a13e9cedef..ebf158a1144b9 100644 --- a/test/CodeGen/SystemZ/int-cmp-08.ll +++ b/test/CodeGen/SystemZ/int-cmp-08.ll @@ -4,9 +4,8 @@ ; Check CLGR. define double @f1(double %a, double %b, i64 %i1, i64 %i2) { -; CHECK: f1: -; CHECK: clgr %r2, %r3 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: clgrjl %r2, %r3 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ult i64 %i1, %i2 @@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i64 %i1, i64 %i2) { ; Check CLG with no displacement. define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %i2 = load i64 *%ptr @@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { ; Check the high end of the aligned CLG range. define double @f3(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clg %r2, 524280(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 65535 @@ -44,10 +43,10 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f4(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: clg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 65536 @@ -59,9 +58,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) { ; Check the high end of the negative aligned CLG range. define double @f5(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clg %r2, -8(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -1 @@ -73,9 +72,9 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) { ; Check the low end of the CLG range. define double @f6(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clg %r2, -524288(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -65536 @@ -88,10 +87,10 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f7(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: clg %r2, 0(%r3) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -65537 @@ -103,9 +102,9 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) { ; Check that CLG allows an index. define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clg %r2, 524280({{%r4,%r3|%r3,%r4}}) -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add1 = add i64 %base, %index @@ -116,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { %res = select i1 %cond, double %a, double %b ret double %res } + +; Check the comparison can be reversed if that allows CLG to be used. +define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: clg %r2, 0(%r3) +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i1 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-09.ll b/test/CodeGen/SystemZ/int-cmp-09.ll index bb7213c6a436c..0eb8c6688c0cc 100644 --- a/test/CodeGen/SystemZ/int-cmp-09.ll +++ b/test/CodeGen/SystemZ/int-cmp-09.ll @@ -4,9 +4,8 @@ ; Check comparisons with 0. define double @f1(double %a, double %b, i32 %i1) { -; CHECK: f1: -; CHECK: chi %r2, 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: cijl %r2, 0 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, 0 @@ -14,23 +13,45 @@ define double @f1(double %a, double %b, i32 %i1) { ret double %res } -; Check comparisons with 1. +; Check comparisons with 2. define double @f2(double %a, double %b, i32 %i1) { -; CHECK: f2: -; CHECK: chi %r2, 1 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f2: +; CHECK: cijl %r2, 2 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 - %cond = icmp slt i32 %i1, 1 + %cond = icmp slt i32 %i1, 2 %res = select i1 %cond, double %a, double %b ret double %res } -; Check the high end of the CHI range. +; Check the high end of the CIJ range. define double @f3(double %a, double %b, i32 %i1) { -; CHECK: f3: +; CHECK-LABEL: f3: +; CHECK: cijl %r2, 127 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CHI instead. +define double @f4(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f4: +; CHECK: chi %r2, 128 +; CHECK-NEXT: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CHI range. +define double @f5(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f5: ; CHECK: chi %r2, 32767 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, 32767 @@ -39,10 +60,10 @@ define double @f3(double %a, double %b, i32 %i1) { } ; Check the next value up, which must use CFI. -define double @f4(double %a, double %b, i32 %i1) { -; CHECK: f4: +define double @f6(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f6: ; CHECK: cfi %r2, 32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, 32768 @@ -51,10 +72,10 @@ define double @f4(double %a, double %b, i32 %i1) { } ; Check the high end of the signed 32-bit range. -define double @f5(double %a, double %b, i32 %i1) { -; CHECK: f5: +define double @f7(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f7: ; CHECK: cfi %r2, 2147483647 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i32 %i1, 2147483647 @@ -63,10 +84,10 @@ define double @f5(double %a, double %b, i32 %i1) { } ; Check the next value up, which should be treated as a negative value. -define double @f6(double %a, double %b, i32 %i1) { -; CHECK: f6: +define double @f8(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f8: ; CHECK: cfi %r2, -2147483648 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i32 %i1, 2147483648 @@ -74,11 +95,10 @@ define double @f6(double %a, double %b, i32 %i1) { ret double %res } -; Check the high end of the negative CHI range. -define double @f7(double %a, double %b, i32 %i1) { -; CHECK: f7: -; CHECK: chi %r2, -1 -; CHECK-NEXT: j{{g?}}l +; Check the high end of the negative CIJ range. +define double @f9(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f9: +; CHECK: cijl %r2, -1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, -1 @@ -86,11 +106,34 @@ define double @f7(double %a, double %b, i32 %i1) { ret double %res } +; Check the low end of the CIJ range. +define double @f10(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f10: +; CHECK: cijl %r2, -128 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CHI instead. +define double @f11(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f11: +; CHECK: chi %r2, -129 +; CHECK-NEXT: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + ; Check the low end of the CHI range. -define double @f8(double %a, double %b, i32 %i1) { -; CHECK: f8: +define double @f12(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f12: ; CHECK: chi %r2, -32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, -32768 @@ -99,10 +142,10 @@ define double @f8(double %a, double %b, i32 %i1) { } ; Check the next value down, which must use CFI instead. -define double @f9(double %a, double %b, i32 %i1) { -; CHECK: f9: +define double @f13(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f13: ; CHECK: cfi %r2, -32769 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i32 %i1, -32769 @@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i32 %i1) { } ; Check the low end of the signed 32-bit range. -define double @f10(double %a, double %b, i32 %i1) { -; CHECK: f10: +define double @f14(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f14: ; CHECK: cfi %r2, -2147483648 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i32 %i1, -2147483648 @@ -123,13 +166,57 @@ define double @f10(double %a, double %b, i32 %i1) { } ; Check the next value down, which should be treated as a positive value. -define double @f11(double %a, double %b, i32 %i1) { -; CHECK: f11: +define double @f15(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f15: ; CHECK: cfi %r2, 2147483647 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i32 %i1, -2147483649 %res = select i1 %cond, double %a, double %b ret double %res } + +; Check that < 1 becomes <= 0. +define double @f16(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f16: +; CHECK: cijle %r2, 0 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that >= 1 becomes > 0. +define double @f17(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f17: +; CHECK: cijh %r2, 0 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp sge i32 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that > -1 becomes >= 0. +define double @f18(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f18: +; CHECK: cijhe %r2, 0 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp sgt i32 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that <= -1 becomes < 0. +define double @f19(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f19: +; CHECK: cijl %r2, 0 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp sle i32 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-10.ll b/test/CodeGen/SystemZ/int-cmp-10.ll index f2d3ccd64af62..4d4c4bbd20d10 100644 --- a/test/CodeGen/SystemZ/int-cmp-10.ll +++ b/test/CodeGen/SystemZ/int-cmp-10.ll @@ -2,12 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; Check a value near the low end of the range. We use CFI for comparisons -; with zero, or things that are equivalent to them. +; Check a value near the low end of the range. We use signed forms for +; comparisons with zero, or things that are equivalent to them. define double @f1(double %a, double %b, i32 %i1) { -; CHECK: f1: -; CHECK: clfi %r2, 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-LABEL: f1: +; CHECK: clijh %r2, 1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ugt i32 %i1, 1 @@ -15,11 +14,34 @@ define double @f1(double %a, double %b, i32 %i1) { ret double %res } -; Check a value near the high end of the range. +; Check the top of the CLIJ range. define double @f2(double %a, double %b, i32 %i1) { -; CHECK: f2: +; CHECK-LABEL: f2: +; CHECK: clijl %r2, 255 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i32 %i1, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which needs a separate comparison. +define double @f3(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f3: +; CHECK: clfi %r2, 256 +; CHECK: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i32 %i1, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check a value near the high end of the range. +define double @f4(double %a, double %b, i32 %i1) { +; CHECK-LABEL: f4: ; CHECK: clfi %r2, 4294967280 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ult i32 %i1, 4294967280 diff --git a/test/CodeGen/SystemZ/int-cmp-11.ll b/test/CodeGen/SystemZ/int-cmp-11.ll index 1bfb0c61cb90f..c74135a5d3931 100644 --- a/test/CodeGen/SystemZ/int-cmp-11.ll +++ b/test/CodeGen/SystemZ/int-cmp-11.ll @@ -4,9 +4,8 @@ ; Check comparisons with 0. define double @f1(double %a, double %b, i64 %i1) { -; CHECK: f1: -; CHECK: cghi %r2, 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f1: +; CHECK: cgijl %r2, 0 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 0 @@ -16,9 +15,8 @@ define double @f1(double %a, double %b, i64 %i1) { ; Check comparisons with 1. define double @f2(double %a, double %b, i64 %i1) { -; CHECK: f2: -; CHECK: cghi %r2, 1 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f2: +; CHECK: cgijle %r2, 0 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 1 @@ -26,11 +24,34 @@ define double @f2(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the CGHI range. +; Check the high end of the CGIJ range. define double @f3(double %a, double %b, i64 %i1) { -; CHECK: f3: +; CHECK-LABEL: f3: +; CHECK: cgijl %r2, 127 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGHI instead. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f4: +; CHECK: cghi %r2, 128 +; CHECK-NEXT: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f5(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f5: ; CHECK: cghi %r2, 32767 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 32767 @@ -39,10 +60,10 @@ define double @f3(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use CGFI. -define double @f4(double %a, double %b, i64 %i1) { -; CHECK: f4: +define double @f6(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f6: ; CHECK: cgfi %r2, 32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 32768 @@ -51,10 +72,10 @@ define double @f4(double %a, double %b, i64 %i1) { } ; Check the high end of the CGFI range. -define double @f5(double %a, double %b, i64 %i1) { -; CHECK: f5: +define double @f7(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f7: ; CHECK: cgfi %r2, 2147483647 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 2147483647 @@ -63,10 +84,9 @@ define double @f5(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use register comparison. -define double @f6(double %a, double %b, i64 %i1) { -; CHECK: f6: -; CHECK: cgr -; CHECK-NEXT: j{{g?}}l +define double @f8(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f8: +; CHECK: cgrjl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, 2147483648 @@ -74,11 +94,10 @@ define double @f6(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the negative CGHI range. -define double @f7(double %a, double %b, i64 %i1) { -; CHECK: f7: -; CHECK: cghi %r2, -1 -; CHECK-NEXT: j{{g?}}l +; Check the high end of the negative CGIJ range. +define double @f9(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f9: +; CHECK: cgijl %r2, -1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, -1 @@ -86,11 +105,34 @@ define double @f7(double %a, double %b, i64 %i1) { ret double %res } +; Check the low end of the CGIJ range. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f10: +; CHECK: cgijl %r2, -128 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGHI instead. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f11: +; CHECK: cghi %r2, -129 +; CHECK-NEXT: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + ; Check the low end of the CGHI range. -define double @f8(double %a, double %b, i64 %i1) { -; CHECK: f8: +define double @f12(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f12: ; CHECK: cghi %r2, -32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, -32768 @@ -99,10 +141,10 @@ define double @f8(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use CGFI instead. -define double @f9(double %a, double %b, i64 %i1) { -; CHECK: f9: +define double @f13(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f13: ; CHECK: cgfi %r2, -32769 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, -32769 @@ -111,10 +153,10 @@ define double @f9(double %a, double %b, i64 %i1) { } ; Check the low end of the CGFI range. -define double @f10(double %a, double %b, i64 %i1) { -; CHECK: f10: +define double @f14(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f14: ; CHECK: cgfi %r2, -2147483648 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, -2147483648 @@ -123,10 +165,9 @@ define double @f10(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use register comparison. -define double @f11(double %a, double %b, i64 %i1) { -; CHECK: f11: -; CHECK: cgr -; CHECK-NEXT: j{{g?}}l +define double @f15(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f15: +; CHECK: cgrjl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp slt i64 %i1, -2147483649 diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll index 0288730c3a80c..077b22423e061 100644 --- a/test/CodeGen/SystemZ/int-cmp-12.ll +++ b/test/CodeGen/SystemZ/int-cmp-12.ll @@ -2,12 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; Check a value near the low end of the range. We use CGFI for comparisons -; with zero, or things that are equivalent to them. +; Check a value near the low end of the range. We use signed forms for +; comparisons with zero, or things that are equivalent to them. define double @f1(double %a, double %b, i64 %i1) { -; CHECK: f1: -; CHECK: clgfi %r2, 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-LABEL: f1: +; CHECK: clgijh %r2, 1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ugt i64 %i1, 1 @@ -15,11 +14,34 @@ define double @f1(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the CLGFI range. +; Check the top of the CLGIJ range. define double @f2(double %a, double %b, i64 %i1) { -; CHECK: f2: +; CHECK-LABEL: f2: +; CHECK: clgijl %r2, 255 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i64 %i1, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which needs a separate comparison. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f3: +; CHECK: clgfi %r2, 256 +; CHECK: jl +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i64 %i1, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLGFI range. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f4: ; CHECK: clgfi %r2, 4294967295 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ult i64 %i1, 4294967295 @@ -28,10 +50,9 @@ define double @f2(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use a register comparison. -define double @f3(double %a, double %b, i64 %i1) { -; CHECK: f3: -; CHECK: clgr %r2, -; CHECK-NEXT: j{{g?}}l +define double @f5(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f5: +; CHECK: clgrjl %r2, ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ult i64 %i1, 4294967296 diff --git a/test/CodeGen/SystemZ/int-cmp-13.ll b/test/CodeGen/SystemZ/int-cmp-13.ll index c180831debb07..53af0c868a251 100644 --- a/test/CodeGen/SystemZ/int-cmp-13.ll +++ b/test/CodeGen/SystemZ/int-cmp-13.ll @@ -4,9 +4,8 @@ ; Check comparisons with 0. define double @f1(double %a, double %b, i64 %i1) { -; CHECK: f1: -; CHECK: cghi %r2, 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-LABEL: f1: +; CHECK: cgije %r2, 0 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 0 @@ -14,11 +13,34 @@ define double @f1(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the CGHI range. +; Check the high end of the CGIJ range. define double @f2(double %a, double %b, i64 %i1) { -; CHECK: f2: +; CHECK-LABEL: f2: +; CHECK: cgije %r2, 127 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGHI instead. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f3: +; CHECK: cghi %r2, 128 +; CHECK-NEXT: je +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f4: ; CHECK: cghi %r2, 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 32767 @@ -27,10 +49,10 @@ define double @f2(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use CGFI. -define double @f3(double %a, double %b, i64 %i1) { -; CHECK: f3: +define double @f5(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f5: ; CHECK: cgfi %r2, 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 32768 @@ -39,10 +61,10 @@ define double @f3(double %a, double %b, i64 %i1) { } ; Check the high end of the CGFI range. -define double @f4(double %a, double %b, i64 %i1) { -; CHECK: f4: +define double @f6(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f6: ; CHECK: cgfi %r2, 2147483647 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 2147483647 @@ -51,10 +73,10 @@ define double @f4(double %a, double %b, i64 %i1) { } ; Check the next value up, which should use CLGFI instead. -define double @f5(double %a, double %b, i64 %i1) { -; CHECK: f5: +define double @f7(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f7: ; CHECK: clgfi %r2, 2147483648 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 2147483648 @@ -63,10 +85,10 @@ define double @f5(double %a, double %b, i64 %i1) { } ; Check the high end of the CLGFI range. -define double @f6(double %a, double %b, i64 %i1) { -; CHECK: f6: +define double @f8(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f8: ; CHECK: clgfi %r2, 4294967295 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 4294967295 @@ -75,10 +97,9 @@ define double @f6(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use a register comparison. -define double @f7(double %a, double %b, i64 %i1) { -; CHECK: f7: -; CHECK: cgr %r2, -; CHECK-NEXT: j{{g?}}e +define double @f9(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f9: +; CHECK: cgrje %r2, ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, 4294967296 @@ -86,11 +107,10 @@ define double @f7(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the negative CGHI range. -define double @f8(double %a, double %b, i64 %i1) { -; CHECK: f8: -; CHECK: cghi %r2, -1 -; CHECK-NEXT: j{{g?}}e +; Check the high end of the negative CGIJ range. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f10: +; CHECK: cgije %r2, -1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, -1 @@ -98,11 +118,34 @@ define double @f8(double %a, double %b, i64 %i1) { ret double %res } +; Check the low end of the CGIJ range. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f11: +; CHECK: cgije %r2, -128 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGHI instead. +define double @f12(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f12: +; CHECK: cghi %r2, -129 +; CHECK-NEXT: je +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + ; Check the low end of the CGHI range. -define double @f9(double %a, double %b, i64 %i1) { -; CHECK: f9: +define double @f13(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f13: ; CHECK: cghi %r2, -32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, -32768 @@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use CGFI instead. -define double @f10(double %a, double %b, i64 %i1) { -; CHECK: f10: +define double @f14(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f14: ; CHECK: cgfi %r2, -32769 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, -32769 @@ -123,10 +166,10 @@ define double @f10(double %a, double %b, i64 %i1) { } ; Check the low end of the CGFI range. -define double @f11(double %a, double %b, i64 %i1) { -; CHECK: f11: +define double @f15(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f15: ; CHECK: cgfi %r2, -2147483648 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, -2147483648 @@ -135,10 +178,9 @@ define double @f11(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use register comparison. -define double @f12(double %a, double %b, i64 %i1) { -; CHECK: f12: -; CHECK: cgr -; CHECK-NEXT: j{{g?}}e +define double @f16(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f16: +; CHECK: cgrje ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp eq i64 %i1, -2147483649 diff --git a/test/CodeGen/SystemZ/int-cmp-14.ll b/test/CodeGen/SystemZ/int-cmp-14.ll index 6a7e0e6d552a4..4dbd0ece3af6b 100644 --- a/test/CodeGen/SystemZ/int-cmp-14.ll +++ b/test/CodeGen/SystemZ/int-cmp-14.ll @@ -4,9 +4,8 @@ ; Check comparisons with 0. define double @f1(double %a, double %b, i64 %i1) { -; CHECK: f1: -; CHECK: cghi %r2, 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-LABEL: f1: +; CHECK: cgijlh %r2, 0 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 0 @@ -14,11 +13,34 @@ define double @f1(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the CGHI range. +; Check the high end of the CGIJ range. define double @f2(double %a, double %b, i64 %i1) { -; CHECK: f2: +; CHECK-LABEL: f2: +; CHECK: cgijlh %r2, 127 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGHI instead. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f3: +; CHECK: cghi %r2, 128 +; CHECK-NEXT: jlh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f4: ; CHECK: cghi %r2, 32767 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 32767 @@ -27,10 +49,10 @@ define double @f2(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use CGFI. -define double @f3(double %a, double %b, i64 %i1) { -; CHECK: f3: +define double @f5(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f5: ; CHECK: cgfi %r2, 32768 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 32768 @@ -39,10 +61,10 @@ define double @f3(double %a, double %b, i64 %i1) { } ; Check the high end of the CGFI range. -define double @f4(double %a, double %b, i64 %i1) { -; CHECK: f4: +define double @f6(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f6: ; CHECK: cgfi %r2, 2147483647 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 2147483647 @@ -51,10 +73,10 @@ define double @f4(double %a, double %b, i64 %i1) { } ; Check the next value up, which should use CLGFI instead. -define double @f5(double %a, double %b, i64 %i1) { -; CHECK: f5: +define double @f7(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f7: ; CHECK: clgfi %r2, 2147483648 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 2147483648 @@ -63,10 +85,10 @@ define double @f5(double %a, double %b, i64 %i1) { } ; Check the high end of the CLGFI range. -define double @f6(double %a, double %b, i64 %i1) { -; CHECK: f6: +define double @f8(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f8: ; CHECK: clgfi %r2, 4294967295 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 4294967295 @@ -75,10 +97,9 @@ define double @f6(double %a, double %b, i64 %i1) { } ; Check the next value up, which must use a register comparison. -define double @f7(double %a, double %b, i64 %i1) { -; CHECK: f7: -; CHECK: cgr %r2, -; CHECK-NEXT: j{{g?}}lh +define double @f9(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f9: +; CHECK: cgrjlh %r2, ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, 4294967296 @@ -86,11 +107,10 @@ define double @f7(double %a, double %b, i64 %i1) { ret double %res } -; Check the high end of the negative CGHI range. -define double @f8(double %a, double %b, i64 %i1) { -; CHECK: f8: -; CHECK: cghi %r2, -1 -; CHECK-NEXT: j{{g?}}lh +; Check the high end of the negative CGIJ range. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f10: +; CHECK: cgijlh %r2, -1 ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, -1 @@ -98,11 +118,34 @@ define double @f8(double %a, double %b, i64 %i1) { ret double %res } +; Check the low end of the CGIJ range. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f11: +; CHECK: cgijlh %r2, -128 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGHI instead. +define double @f12(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f12: +; CHECK: cghi %r2, -129 +; CHECK-NEXT: jlh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + ; Check the low end of the CGHI range. -define double @f9(double %a, double %b, i64 %i1) { -; CHECK: f9: +define double @f13(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f13: ; CHECK: cghi %r2, -32768 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, -32768 @@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use CGFI instead. -define double @f10(double %a, double %b, i64 %i1) { -; CHECK: f10: +define double @f14(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f14: ; CHECK: cgfi %r2, -32769 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, -32769 @@ -123,10 +166,10 @@ define double @f10(double %a, double %b, i64 %i1) { } ; Check the low end of the CGFI range. -define double @f11(double %a, double %b, i64 %i1) { -; CHECK: f11: +define double @f15(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f15: ; CHECK: cgfi %r2, -2147483648 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, -2147483648 @@ -135,10 +178,9 @@ define double @f11(double %a, double %b, i64 %i1) { } ; Check the next value down, which must use register comparison. -define double @f12(double %a, double %b, i64 %i1) { -; CHECK: f12: -; CHECK: cgr -; CHECK-NEXT: j{{g?}}lh +define double @f16(double %a, double %b, i64 %i1) { +; CHECK-LABEL: f16: +; CHECK: cgrjlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cond = icmp ne i64 %i1, -2147483649 diff --git a/test/CodeGen/SystemZ/int-cmp-15.ll b/test/CodeGen/SystemZ/int-cmp-15.ll index 6bb7e2b3ac3d6..48a068e49e8f3 100644 --- a/test/CodeGen/SystemZ/int-cmp-15.ll +++ b/test/CodeGen/SystemZ/int-cmp-15.ll @@ -4,9 +4,9 @@ ; Check ordered comparisons near the low end of the unsigned 8-bit range. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp ugt i8 %val, 1 @@ -16,9 +16,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check ordered comparisons near the high end of the unsigned 8-bit range. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp ult i8 %val, 254 @@ -28,9 +28,9 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check tests for negative bytes. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp slt i8 %val, 0 @@ -40,9 +40,9 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; ...and an alternative form. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp sle i8 %val, -1 @@ -52,9 +52,9 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check tests for non-negative bytes. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp sge i8 %val, 0 @@ -64,9 +64,9 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; ...and an alternative form. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp sgt i8 %val, -1 @@ -76,9 +76,9 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; Check equality comparisons at the low end of the signed 8-bit range. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp eq i8 %val, -128 @@ -88,9 +88,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check equality comparisons at the low end of the unsigned 8-bit range. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp eq i8 %val, 0 @@ -100,9 +100,9 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check equality comparisons at the high end of the signed 8-bit range. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp eq i8 %val, 127 @@ -112,9 +112,9 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check equality comparisons at the high end of the unsigned 8-bit range. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %cond = icmp eq i8 %val, 255 @@ -124,7 +124,7 @@ define double @f10(double %a, double %b, i8 *%ptr) { ; Check the high end of the CLI range. define double @f11(double %a, double %b, i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: cli 4095(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -136,7 +136,7 @@ define double @f11(double %a, double %b, i8 *%src) { ; Check the next byte up, which should use CLIY instead of CLI. define double @f12(double %a, double %b, i8 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: cliy 4096(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -148,7 +148,7 @@ define double @f12(double %a, double %b, i8 *%src) { ; Check the high end of the CLIY range. define double @f13(double %a, double %b, i8 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: cliy 524287(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -161,7 +161,7 @@ define double @f13(double %a, double %b, i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f14(double %a, double %b, i8 *%src) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: agfi %r2, 524288 ; CHECK: cli 0(%r2), 127 ; CHECK: br %r14 @@ -174,7 +174,7 @@ define double @f14(double %a, double %b, i8 *%src) { ; Check the high end of the negative CLIY range. define double @f15(double %a, double %b, i8 *%src) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: cliy -1(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -186,7 +186,7 @@ define double @f15(double %a, double %b, i8 *%src) { ; Check the low end of the CLIY range. define double @f16(double %a, double %b, i8 *%src) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: cliy -524288(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -199,7 +199,7 @@ define double @f16(double %a, double %b, i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define double @f17(double %a, double %b, i8 *%src) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: agfi %r2, -524289 ; CHECK: cli 0(%r2), 127 ; CHECK: br %r14 @@ -212,7 +212,7 @@ define double @f17(double %a, double %b, i8 *%src) { ; Check that CLI does not allow an index define double @f18(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: agr %r2, %r3 ; CHECK: cli 4095(%r2), 127 ; CHECK: br %r14 @@ -227,7 +227,7 @@ define double @f18(double %a, double %b, i64 %base, i64 %index) { ; Check that CLIY does not allow an index define double @f19(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f19: +; CHECK-LABEL: f19: ; CHECK: agr %r2, %r3 ; CHECK: cliy 4096(%r2), 127 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/int-cmp-16.ll b/test/CodeGen/SystemZ/int-cmp-16.ll index 8af854efaabf0..be206d9c9470b 100644 --- a/test/CodeGen/SystemZ/int-cmp-16.ll +++ b/test/CodeGen/SystemZ/int-cmp-16.ll @@ -5,9 +5,9 @@ ; Check the low end of the 8-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check the high end of the 8-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; Check the high end of the signed 8-bit range, using sign extension. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check the low end of the signed 8-bit range, using sign extension. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-17.ll b/test/CodeGen/SystemZ/int-cmp-17.ll index d4d5e98b83587..3df4ecc668031 100644 --- a/test/CodeGen/SystemZ/int-cmp-17.ll +++ b/test/CodeGen/SystemZ/int-cmp-17.ll @@ -5,9 +5,9 @@ ; Check the low end of the 8-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check the high end of the 8-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; Check the high end of the signed 8-bit range, using sign extension. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check the low end of the signed 8-bit range, using sign extension. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-18.ll b/test/CodeGen/SystemZ/int-cmp-18.ll index 9822dc2122486..d03d6ac9a2c7b 100644 --- a/test/CodeGen/SystemZ/int-cmp-18.ll +++ b/test/CodeGen/SystemZ/int-cmp-18.ll @@ -5,9 +5,9 @@ ; Check the low end of the 8-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check the high end of the 8-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; Check the high end of the signed 8-bit range, using sign extension. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check the low end of the signed 8-bit range, using sign extension. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-19.ll b/test/CodeGen/SystemZ/int-cmp-19.ll index 7d29dbcedcd65..b5f0856b40024 100644 --- a/test/CodeGen/SystemZ/int-cmp-19.ll +++ b/test/CodeGen/SystemZ/int-cmp-19.ll @@ -5,9 +5,9 @@ ; Check the low end of the 8-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check the high end of the 8-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cli 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; Check the high end of the signed 8-bit range, using sign extension. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 255 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check the low end of the signed 8-bit range, using sign extension. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll index 8fffbc86a7378..98c41cd3a2409 100644 --- a/test/CodeGen/SystemZ/int-cmp-20.ll +++ b/test/CodeGen/SystemZ/int-cmp-20.ll @@ -6,9 +6,9 @@ ; Check unsigned comparison near the low end of the CLI range, using zero ; extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the low end of the CLI range, using sign ; extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the high end of the CLI range, using zero ; extension. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the high end of the CLI range, using sign ; extension. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -62,8 +62,8 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison above the high end of the CLI range, using zero ; extension. The condition is always true. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: -; CHECK-NOT: cli +; CHECK-LABEL: f5: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -78,8 +78,8 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; unlikely to occur in practice, we don't bother optimizing the second case, ; and simply ignore CLI for this range. First check the low end of the range. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: -; CHECK-NOT: cli +; CHECK-LABEL: f6: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -90,8 +90,8 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; ...and then the high end. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: -; CHECK-NOT: cli +; CHECK-LABEL: f7: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -103,9 +103,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the low end of the CLI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -117,8 +117,8 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the low end of the CLI range, using sign ; extension. This cannot use CLI. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: -; CHECK-NOT: cli +; CHECK-LABEL: f9: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -130,9 +130,9 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the high end of the CLI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -144,8 +144,8 @@ define double @f10(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the high end of the CLI range, using sign ; extension. This cannot use CLI. define double @f11(double %a, double %b, i8 *%ptr) { -; CHECK: f11: -; CHECK-NOT: cli +; CHECK-LABEL: f11: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -157,8 +157,8 @@ define double @f11(double %a, double %b, i8 *%ptr) { ; Check signed comparison above the high end of the CLI range, using zero ; extension. The condition is always true. define double @f12(double %a, double %b, i8 *%ptr) { -; CHECK: f12: -; CHECK-NOT: cli +; CHECK-LABEL: f12: +; CHECK-NOT: cli {{.*}} ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i32 @@ -169,9 +169,9 @@ define double @f12(double %a, double %b, i8 *%ptr) { ; Check tests for nonnegative values. define double @f13(double %a, double %b, i8 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -182,9 +182,9 @@ define double @f13(double %a, double %b, i8 *%ptr) { ; ...and another form define double @f14(double %a, double %b, i8 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -195,9 +195,9 @@ define double @f14(double %a, double %b, i8 *%ptr) { ; Check tests for negative values. define double @f15(double %a, double %b, i8 *%ptr) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 @@ -208,9 +208,9 @@ define double @f15(double %a, double %b, i8 *%ptr) { ; ...and another form define double @f16(double %a, double %b, i8 *%ptr) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i32 diff --git a/test/CodeGen/SystemZ/int-cmp-21.ll b/test/CodeGen/SystemZ/int-cmp-21.ll index 43447b8fda078..ca9225dead92c 100644 --- a/test/CodeGen/SystemZ/int-cmp-21.ll +++ b/test/CodeGen/SystemZ/int-cmp-21.ll @@ -6,9 +6,9 @@ ; Check unsigned comparison near the low end of the CLI range, using zero ; extension. define double @f1(double %a, double %b, i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the low end of the CLI range, using sign ; extension. define double @f2(double %a, double %b, i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the high end of the CLI range, using zero ; extension. define double @f3(double %a, double %b, i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison near the high end of the CLI range, using sign ; extension. define double @f4(double %a, double %b, i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i8 *%ptr) { ; Check unsigned comparison above the high end of the CLI range, using zero ; extension. The condition is always true. define double @f5(double %a, double %b, i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -78,7 +78,7 @@ define double @f5(double %a, double %b, i8 *%ptr) { ; unlikely to occur in practice, we don't bother optimizing the second case, ; and simply ignore CLI for this range. First check the low end of the range. define double @f6(double %a, double %b, i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -90,7 +90,7 @@ define double @f6(double %a, double %b, i8 *%ptr) { ; ...and then the high end. define double @f7(double %a, double %b, i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -103,9 +103,9 @@ define double @f7(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the low end of the CLI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f8(double %a, double %b, i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cli 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -117,7 +117,7 @@ define double @f8(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the low end of the CLI range, using sign ; extension. This cannot use CLI. define double @f9(double %a, double %b, i8 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -130,9 +130,9 @@ define double @f9(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the high end of the CLI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f10(double %a, double %b, i8 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: cli 0(%r2), 254 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = zext i8 %val to i64 @@ -144,7 +144,7 @@ define double @f10(double %a, double %b, i8 *%ptr) { ; Check signed comparison near the high end of the CLI range, using sign ; extension. This cannot use CLI. define double @f11(double %a, double %b, i8 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -157,7 +157,7 @@ define double @f11(double %a, double %b, i8 *%ptr) { ; Check signed comparison above the high end of the CLI range, using zero ; extension. The condition is always true. define double @f12(double %a, double %b, i8 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i8 *%ptr @@ -169,9 +169,9 @@ define double @f12(double %a, double %b, i8 *%ptr) { ; Check tests for nonnegative values. define double @f13(double %a, double %b, i8 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -182,9 +182,9 @@ define double @f13(double %a, double %b, i8 *%ptr) { ; ...and another form define double @f14(double %a, double %b, i8 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: cli 0(%r2), 128 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -195,9 +195,9 @@ define double @f14(double %a, double %b, i8 *%ptr) { ; Check tests for negative values. define double @f15(double %a, double %b, i8 *%ptr) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 @@ -208,9 +208,9 @@ define double @f15(double %a, double %b, i8 *%ptr) { ; ...and another form define double @f16(double %a, double %b, i8 *%ptr) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: cli 0(%r2), 127 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i8 *%ptr %ext = sext i8 %val to i64 diff --git a/test/CodeGen/SystemZ/int-cmp-22.ll b/test/CodeGen/SystemZ/int-cmp-22.ll index 513d4be2f4235..43daec95b7d85 100644 --- a/test/CodeGen/SystemZ/int-cmp-22.ll +++ b/test/CodeGen/SystemZ/int-cmp-22.ll @@ -4,9 +4,9 @@ ; Check comparisons with 0. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: chhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check comparisons with 1. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: -; CHECK: chhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f2: +; CHECK: chhsi 0(%r2), 0 +; CHECK-NEXT: jle ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check a value near the high end of the signed 16-bit range. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: chhsi 0(%r2), 32766 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: chhsi 0(%r2), -1 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check a value near the low end of the 16-bit signed range. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: chhsi 0(%r2), -32766 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; Check the high end of the CHHSI range. define double @f6(double %a, double %b, i16 %i1, i16 *%base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: chhsi 4094(%r3), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 2047 @@ -83,10 +83,10 @@ define double @f6(double %a, double %b, i16 %i1, i16 *%base) { ; Check the next halfword up, which needs separate address logic, define double @f7(double %a, double %b, i16 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: aghi %r2, 4096 ; CHECK: chhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 2048 @@ -98,10 +98,10 @@ define double @f7(double %a, double %b, i16 *%base) { ; Check negative offsets, which also need separate address logic. define double @f8(double %a, double %b, i16 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r2, -2 ; CHECK: chhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 -1 @@ -113,10 +113,10 @@ define double @f8(double %a, double %b, i16 *%base) { ; Check that CHHSI does not allow indices. define double @f9(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: chhsi 0({{%r[23]}}), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-23.ll b/test/CodeGen/SystemZ/int-cmp-23.ll index 40e13310d55c8..99fe74b1c7874 100644 --- a/test/CodeGen/SystemZ/int-cmp-23.ll +++ b/test/CodeGen/SystemZ/int-cmp-23.ll @@ -4,9 +4,9 @@ ; Check a value near the low end of the unsigned 16-bit range. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check a value near the high end of the unsigned 16-bit range. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the high end of the CLHHSI range. define double @f3(double %a, double %b, i16 %i1, i16 *%base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhhsi 4094(%r3), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 2047 @@ -44,10 +44,10 @@ define double @f3(double %a, double %b, i16 %i1, i16 *%base) { ; Check the next halfword up, which needs separate address logic, define double @f4(double %a, double %b, i16 *%base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 2048 @@ -59,10 +59,10 @@ define double @f4(double %a, double %b, i16 *%base) { ; Check negative offsets, which also need separate address logic. define double @f5(double %a, double %b, i16 *%base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: aghi %r2, -2 ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i16 *%base, i64 -1 @@ -74,10 +74,10 @@ define double @f5(double %a, double %b, i16 *%base) { ; Check that CLHHSI does not allow indices. define double @f6(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: clhhsi 0({{%r[23]}}), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-24.ll b/test/CodeGen/SystemZ/int-cmp-24.ll index 46186cd74b534..1a8e587b0341e 100644 --- a/test/CodeGen/SystemZ/int-cmp-24.ll +++ b/test/CodeGen/SystemZ/int-cmp-24.ll @@ -4,9 +4,9 @@ ; Check the low end of the unsigned 16-bit range. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the unsigned 16-bit range. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-25.ll b/test/CodeGen/SystemZ/int-cmp-25.ll index a3a223fa34480..50803df1ba91a 100644 --- a/test/CodeGen/SystemZ/int-cmp-25.ll +++ b/test/CodeGen/SystemZ/int-cmp-25.ll @@ -4,9 +4,9 @@ ; Check the low end of the unsigned 16-bit range. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the unsigned 16-bit range. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr @@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-26.ll b/test/CodeGen/SystemZ/int-cmp-26.ll index 31330b2a63972..60778654b2755 100644 --- a/test/CodeGen/SystemZ/int-cmp-26.ll +++ b/test/CodeGen/SystemZ/int-cmp-26.ll @@ -5,9 +5,9 @@ ; Check the low end of the 16-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the 16-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range, using sign extension. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range, using sign extension. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-27.ll b/test/CodeGen/SystemZ/int-cmp-27.ll index 7cbea3d92526e..3102f5c5faa4c 100644 --- a/test/CodeGen/SystemZ/int-cmp-27.ll +++ b/test/CodeGen/SystemZ/int-cmp-27.ll @@ -5,9 +5,9 @@ ; Check the low end of the 16-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the 16-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range, using sign extension. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range, using sign extension. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-28.ll b/test/CodeGen/SystemZ/int-cmp-28.ll index 629eb4f06013f..c3b905974ebc3 100644 --- a/test/CodeGen/SystemZ/int-cmp-28.ll +++ b/test/CodeGen/SystemZ/int-cmp-28.ll @@ -5,9 +5,9 @@ ; Check the low end of the 16-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the 16-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range, using sign extension. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range, using sign extension. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-29.ll b/test/CodeGen/SystemZ/int-cmp-29.ll index de41dd782d210..1b40d8cfb2ae3 100644 --- a/test/CodeGen/SystemZ/int-cmp-29.ll +++ b/test/CodeGen/SystemZ/int-cmp-29.ll @@ -5,9 +5,9 @@ ; Check the low end of the 16-bit unsigned range, with zero extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check the high end of the 16-bit unsigned range, with zero extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check the next value up, with zero extension. The condition is always false. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, with zero extension. ; This condition is also always false. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check comparisons with 0, using sign extension. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clhhsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; Check the high end of the signed 16-bit range, using sign extension. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: clhhsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; Check the next value up, using sign extension. ; The condition is always false. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check comparisons with -1, using sign extension. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check the low end of the signed 16-bit range, using sign extension. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: clhhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check the next value down, using sign extension. ; The condition is always false. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-30.ll b/test/CodeGen/SystemZ/int-cmp-30.ll index 713ad8ef841d7..6c9498cb3320d 100644 --- a/test/CodeGen/SystemZ/int-cmp-30.ll +++ b/test/CodeGen/SystemZ/int-cmp-30.ll @@ -6,9 +6,9 @@ ; Check unsigned comparison near the low end of the CLHHSI range, using zero ; extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the low end of the CLHHSI range, using sign ; extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the high end of the CLHHSI range, using zero ; extension. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the high end of the CLHHSI range, using sign ; extension. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison above the high end of the CLHHSI range, using zero ; extension. The condition is always true. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -79,7 +79,7 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; and simply ignore CLHHSI for this range. First check the low end of the ; range. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -91,7 +91,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; ...and then the high end. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -104,9 +104,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CLHHSI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -118,9 +118,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CLHHSI range, using sign ; extension. This should use CHHSI instead. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: chhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -132,9 +132,9 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CLHHSI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i32 @@ -146,9 +146,9 @@ define double @f10(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CLHHSI range, using sign ; extension. This should use CHHSI instead. define double @f11(double %a, double %b, i16 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: chhsi 0(%r2), -2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -160,7 +160,7 @@ define double @f11(double %a, double %b, i16 *%ptr) { ; Check signed comparison above the high end of the CLHHSI range, using zero ; extension. The condition is always true. define double @f12(double %a, double %b, i16 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i16 *%ptr @@ -173,9 +173,9 @@ define double @f12(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CHHSI range, using sign ; extension. define double @f13(double %a, double %b, i16 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: chhsi 0(%r2), 32766 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -187,7 +187,7 @@ define double @f13(double %a, double %b, i16 *%ptr) { ; Check signed comparison above the high end of the CHHSI range, using sign ; extension. This condition is always true. define double @f14(double %a, double %b, i16 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK-NOT: chhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -200,9 +200,9 @@ define double @f14(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CHHSI range, using sign ; extension. define double @f15(double %a, double %b, i16 *%ptr) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: chhsi 0(%r2), -32767 -; CHECK-NEXT: j{{g?}}g +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i32 @@ -214,7 +214,7 @@ define double @f15(double %a, double %b, i16 *%ptr) { ; Check signed comparison below the low end of the CHHSI range, using sign ; extension. This condition is always true. define double @f16(double %a, double %b, i16 *%ptr) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK-NOT: chhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-31.ll b/test/CodeGen/SystemZ/int-cmp-31.ll index cabe9b83a1355..21539f20470e6 100644 --- a/test/CodeGen/SystemZ/int-cmp-31.ll +++ b/test/CodeGen/SystemZ/int-cmp-31.ll @@ -6,9 +6,9 @@ ; Check unsigned comparison near the low end of the CLHHSI range, using zero ; extension. define double @f1(double %a, double %b, i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the low end of the CLHHSI range, using sign ; extension. define double @f2(double %a, double %b, i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the high end of the CLHHSI range, using zero ; extension. define double @f3(double %a, double %b, i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison near the high end of the CLHHSI range, using sign ; extension. define double @f4(double %a, double %b, i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i16 *%ptr) { ; Check unsigned comparison above the high end of the CLHHSI range, using zero ; extension. The condition is always true. define double @f5(double %a, double %b, i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -79,7 +79,7 @@ define double @f5(double %a, double %b, i16 *%ptr) { ; and simply ignore CLHHSI for this range. First check the low end of the ; range. define double @f6(double %a, double %b, i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -91,7 +91,7 @@ define double @f6(double %a, double %b, i16 *%ptr) { ; ...and then the high end. define double @f7(double %a, double %b, i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: clhhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -104,9 +104,9 @@ define double @f7(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CLHHSI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f8(double %a, double %b, i16 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: clhhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -118,9 +118,9 @@ define double @f8(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CLHHSI range, using sign ; extension. This should use CHHSI instead. define double @f9(double %a, double %b, i16 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: chhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -132,9 +132,9 @@ define double @f9(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CLHHSI range, using zero ; extension. This is equivalent to unsigned comparison. define double @f10(double %a, double %b, i16 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: clhhsi 0(%r2), 65534 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = zext i16 %val to i64 @@ -146,9 +146,9 @@ define double @f10(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CLHHSI range, using sign ; extension. This should use CHHSI instead. define double @f11(double %a, double %b, i16 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: chhsi 0(%r2), -2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -160,7 +160,7 @@ define double @f11(double %a, double %b, i16 *%ptr) { ; Check signed comparison above the high end of the CLHHSI range, using zero ; extension. The condition is always true. define double @f12(double %a, double %b, i16 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK-NOT: cli ; CHECK: br %r14 %val = load i16 *%ptr @@ -173,9 +173,9 @@ define double @f12(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the high end of the CHHSI range, using sign ; extension. define double @f13(double %a, double %b, i16 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: chhsi 0(%r2), 32766 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -187,7 +187,7 @@ define double @f13(double %a, double %b, i16 *%ptr) { ; Check signed comparison above the high end of the CHHSI range, using sign ; extension. This condition is always true. define double @f14(double %a, double %b, i16 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK-NOT: chhsi ; CHECK: br %r14 %val = load i16 *%ptr @@ -200,9 +200,9 @@ define double @f14(double %a, double %b, i16 *%ptr) { ; Check signed comparison near the low end of the CHHSI range, using sign ; extension. define double @f15(double %a, double %b, i16 *%ptr) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: chhsi 0(%r2), -32767 -; CHECK-NEXT: j{{g?}}g +; CHECK-NEXT: jh ; CHECK: br %r14 %val = load i16 *%ptr %ext = sext i16 %val to i64 @@ -214,7 +214,7 @@ define double @f15(double %a, double %b, i16 *%ptr) { ; Check signed comparison below the low end of the CHHSI range, using sign ; extension. This condition is always true. define double @f16(double %a, double %b, i16 *%ptr) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK-NOT: chhsi ; CHECK: br %r14 %val = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/int-cmp-32.ll b/test/CodeGen/SystemZ/int-cmp-32.ll index 4bdeebb35c99a..6596f9f3ad84b 100644 --- a/test/CodeGen/SystemZ/int-cmp-32.ll +++ b/test/CodeGen/SystemZ/int-cmp-32.ll @@ -4,9 +4,9 @@ ; Check ordered comparisons with 0. define double @f1(double %a, double %b, i32 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: chsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i32 *%ptr) { ; Check ordered comparisons with 1. define double @f2(double %a, double %b, i32 *%ptr) { -; CHECK: f2: -; CHECK: chsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f2: +; CHECK: chsi 0(%r2), 0 +; CHECK-NEXT: jle ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i32 *%ptr) { ; Check ordered comparisons with the high end of the signed 16-bit range. define double @f3(double %a, double %b, i32 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: chsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -43,7 +43,7 @@ define double @f3(double %a, double %b, i32 *%ptr) { ; Check the next value up, which can't use CHSI. define double @f4(double %a, double %b, i32 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: chsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -54,9 +54,9 @@ define double @f4(double %a, double %b, i32 *%ptr) { ; Check ordered comparisons with -1. define double @f5(double %a, double %b, i32 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: chsi 0(%r2), -1 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -67,9 +67,9 @@ define double @f5(double %a, double %b, i32 *%ptr) { ; Check ordered comparisons with the low end of the 16-bit signed range. define double @f6(double %a, double %b, i32 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: chsi 0(%r2), -32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -80,7 +80,7 @@ define double @f6(double %a, double %b, i32 *%ptr) { ; Check the next value down, which can't use CHSI. define double @f7(double %a, double %b, i32 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: chsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -91,9 +91,9 @@ define double @f7(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with 0. define double @f8(double %a, double %b, i32 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: chsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -104,9 +104,9 @@ define double @f8(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with 1. define double @f9(double %a, double %b, i32 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: chsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -117,9 +117,9 @@ define double @f9(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with the high end of the signed 16-bit range. define double @f10(double %a, double %b, i32 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: chsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -130,7 +130,7 @@ define double @f10(double %a, double %b, i32 *%ptr) { ; Check the next value up, which can't use CHSI. define double @f11(double %a, double %b, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK-NOT: chsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -141,9 +141,9 @@ define double @f11(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with -1. define double @f12(double %a, double %b, i32 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: chsi 0(%r2), -1 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -154,9 +154,9 @@ define double @f12(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with the low end of the 16-bit signed range. define double @f13(double %a, double %b, i32 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: chsi 0(%r2), -32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -167,7 +167,7 @@ define double @f13(double %a, double %b, i32 *%ptr) { ; Check the next value down, which should be treated as a positive value. define double @f14(double %a, double %b, i32 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK-NOT: chsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -178,9 +178,9 @@ define double @f14(double %a, double %b, i32 *%ptr) { ; Check the high end of the CHSI range. define double @f15(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: chsi 4092(%r3), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1023 @@ -192,10 +192,10 @@ define double @f15(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic, define double @f16(double %a, double %b, i32 *%base) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: aghi %r2, 4096 ; CHECK: chsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1024 @@ -207,10 +207,10 @@ define double @f16(double %a, double %b, i32 *%base) { ; Check negative offsets, which also need separate address logic. define double @f17(double %a, double %b, i32 *%base) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: aghi %r2, -4 ; CHECK: chsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -222,10 +222,10 @@ define double @f17(double %a, double %b, i32 *%base) { ; Check that CHSI does not allow indices. define double @f18(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: chsi 0({{%r[23]}}), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-33.ll b/test/CodeGen/SystemZ/int-cmp-33.ll index 0144806d44654..e5a653b3815d0 100644 --- a/test/CodeGen/SystemZ/int-cmp-33.ll +++ b/test/CodeGen/SystemZ/int-cmp-33.ll @@ -5,9 +5,9 @@ ; Check ordered comparisons with a constant near the low end of the unsigned ; 16-bit range. define double @f1(double %a, double %b, i32 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clfhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i32 *%ptr) { ; Check ordered comparisons with the high end of the unsigned 16-bit range. define double @f2(double %a, double %b, i32 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clfhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i32 *%ptr) { ; Check the next value up, which can't use CLFHSI. define double @f3(double %a, double %b, i32 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clfhsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with 32768, the lowest value for which ; we prefer CLFHSI to CHSI. define double @f4(double %a, double %b, i32 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clfhsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i32 *%ptr) { ; Check equality comparisons with the high end of the unsigned 16-bit range. define double @f5(double %a, double %b, i32 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clfhsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i32 *%ptr @@ -69,7 +69,7 @@ define double @f5(double %a, double %b, i32 *%ptr) { ; Check the next value up, which can't use CLFHSI. define double @f6(double %a, double %b, i32 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: clfhsi ; CHECK: br %r14 %val = load i32 *%ptr @@ -80,9 +80,9 @@ define double @f6(double %a, double %b, i32 *%ptr) { ; Check the high end of the CLFHSI range. define double @f7(double %a, double %b, i32 %i1, i32 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: clfhsi 4092(%r3), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1023 @@ -94,10 +94,10 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) { ; Check the next word up, which needs separate address logic, define double @f8(double %a, double %b, i32 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r2, 4096 ; CHECK: clfhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 1024 @@ -109,10 +109,10 @@ define double @f8(double %a, double %b, i32 *%base) { ; Check negative offsets, which also need separate address logic. define double @f9(double %a, double %b, i32 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: aghi %r2, -4 ; CHECK: clfhsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i32 *%base, i64 -1 @@ -124,10 +124,10 @@ define double @f9(double %a, double %b, i32 *%base) { ; Check that CLFHSI does not allow indices. define double @f10(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: clfhsi 0({{%r[23]}}), 1 -; CHECK-NEXT: j{{g?}}h +; CHECK-NEXT: jh ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-34.ll b/test/CodeGen/SystemZ/int-cmp-34.ll index b10bd4e080314..8a0219775a4ef 100644 --- a/test/CodeGen/SystemZ/int-cmp-34.ll +++ b/test/CodeGen/SystemZ/int-cmp-34.ll @@ -4,9 +4,9 @@ ; Check ordered comparisons with 0. define double @f1(double %a, double %b, i64 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cghsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i64 *%ptr) { ; Check ordered comparisons with 1. define double @f2(double %a, double %b, i64 *%ptr) { -; CHECK: f2: -; CHECK: cghsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}l +; CHECK-LABEL: f2: +; CHECK: cghsi 0(%r2), 0 +; CHECK-NEXT: jle ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i64 *%ptr) { ; Check ordered comparisons with the high end of the signed 16-bit range. define double @f3(double %a, double %b, i64 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cghsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -43,7 +43,7 @@ define double @f3(double %a, double %b, i64 *%ptr) { ; Check the next value up, which can't use CGHSI. define double @f4(double %a, double %b, i64 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: cghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -54,9 +54,9 @@ define double @f4(double %a, double %b, i64 *%ptr) { ; Check ordered comparisons with -1. define double @f5(double %a, double %b, i64 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: cghsi 0(%r2), -1 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -67,9 +67,9 @@ define double @f5(double %a, double %b, i64 *%ptr) { ; Check ordered comparisons with the low end of the 16-bit signed range. define double @f6(double %a, double %b, i64 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: cghsi 0(%r2), -32768 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -80,7 +80,7 @@ define double @f6(double %a, double %b, i64 *%ptr) { ; Check the next value down, which should be treated as a positive value. define double @f7(double %a, double %b, i64 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: cghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -91,9 +91,9 @@ define double @f7(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with 0. define double @f8(double %a, double %b, i64 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: cghsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -104,9 +104,9 @@ define double @f8(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with 1. define double @f9(double %a, double %b, i64 *%ptr) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: cghsi 0(%r2), 1 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -117,9 +117,9 @@ define double @f9(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with the high end of the signed 16-bit range. define double @f10(double %a, double %b, i64 *%ptr) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: cghsi 0(%r2), 32767 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -130,7 +130,7 @@ define double @f10(double %a, double %b, i64 *%ptr) { ; Check the next value up, which can't use CGHSI. define double @f11(double %a, double %b, i64 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK-NOT: cghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -141,9 +141,9 @@ define double @f11(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with -1. define double @f12(double %a, double %b, i64 *%ptr) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: cghsi 0(%r2), -1 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -154,9 +154,9 @@ define double @f12(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with the low end of the 16-bit signed range. define double @f13(double %a, double %b, i64 *%ptr) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: cghsi 0(%r2), -32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -167,7 +167,7 @@ define double @f13(double %a, double %b, i64 *%ptr) { ; Check the next value down, which should be treated as a positive value. define double @f14(double %a, double %b, i64 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK-NOT: cghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -178,9 +178,9 @@ define double @f14(double %a, double %b, i64 *%ptr) { ; Check the high end of the CGHSI range. define double @f15(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: cghsi 4088(%r3), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 511 @@ -192,10 +192,10 @@ define double @f15(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword up, which needs separate address logic, define double @f16(double %a, double %b, i64 *%base) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: aghi %r2, 4096 ; CHECK: cghsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 512 @@ -207,10 +207,10 @@ define double @f16(double %a, double %b, i64 *%base) { ; Check negative offsets, which also need separate address logic. define double @f17(double %a, double %b, i64 *%base) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: aghi %r2, -8 ; CHECK: cghsi 0(%r2), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -1 @@ -222,10 +222,10 @@ define double @f17(double %a, double %b, i64 *%base) { ; Check that CGHSI does not allow indices. define double @f18(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: cghsi 0({{%r[23]}}), 0 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-35.ll b/test/CodeGen/SystemZ/int-cmp-35.ll index 9934906ba8d4d..539248a86a7b5 100644 --- a/test/CodeGen/SystemZ/int-cmp-35.ll +++ b/test/CodeGen/SystemZ/int-cmp-35.ll @@ -5,9 +5,9 @@ ; Check ordered comparisons with a constant near the low end of the unsigned ; 16-bit range. define double @f1(double %a, double %b, i64 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clghsi 0(%r2), 2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i64 *%ptr) { ; Check ordered comparisons with the high end of the unsigned 16-bit range. define double @f2(double %a, double %b, i64 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clghsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i64 *%ptr) { ; Check the next value up, which can't use CLGHSI. define double @f3(double %a, double %b, i64 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: clghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with 32768, the lowest value for which ; we prefer CLGHSI to CGHSI. define double @f4(double %a, double %b, i64 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clghsi 0(%r2), 32768 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i64 *%ptr) { ; Check equality comparisons with the high end of the unsigned 16-bit range. define double @f5(double %a, double %b, i64 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: clghsi 0(%r2), 65535 -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %val = load i64 *%ptr @@ -69,7 +69,7 @@ define double @f5(double %a, double %b, i64 *%ptr) { ; Check the next value up, which can't use CLGHSI. define double @f6(double %a, double %b, i64 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: clghsi ; CHECK: br %r14 %val = load i64 *%ptr @@ -80,9 +80,9 @@ define double @f6(double %a, double %b, i64 *%ptr) { ; Check the high end of the CLGHSI range. define double @f7(double %a, double %b, i64 %i1, i64 *%base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: clghsi 4088(%r3), 2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 511 @@ -94,10 +94,10 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) { ; Check the next doubleword up, which needs separate address logic, define double @f8(double %a, double %b, i64 *%base) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: aghi %r2, 4096 ; CHECK: clghsi 0(%r2), 2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 512 @@ -109,10 +109,10 @@ define double @f8(double %a, double %b, i64 *%base) { ; Check negative offsets, which also need separate address logic. define double @f9(double %a, double %b, i64 *%base) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: aghi %r2, -8 ; CHECK: clghsi 0(%r2), 2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr i64 *%base, i64 -1 @@ -124,10 +124,10 @@ define double @f9(double %a, double %b, i64 *%base) { ; Check that CLGHSI does not allow indices. define double @f10(double %a, double %b, i64 %base, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agr {{%r2, %r3|%r3, %r2}} ; CHECK: clghsi 0({{%r[23]}}), 2 -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll index 0813594325e43..fa2d4bf6c6178 100644 --- a/test/CodeGen/SystemZ/int-cmp-36.ll +++ b/test/CodeGen/SystemZ/int-cmp-36.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i16 1 +@h = global i16 1, align 1, section "foo" ; Check signed comparison. define i32 @f1(i32 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: chrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i16 *@g @@ -26,7 +27,7 @@ exit: ; Check unsigned comparison, which cannot use CHRL. define i32 @f2(i32 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: chrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i32 @f3(i32 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: chrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i16 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i32 @f4(i32 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: chrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i16 *@g @@ -79,3 +80,42 @@ exit: %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] ret i32 %res } + +; Repeat f1 with an unaligned address. +define i32 @f5(i32 %src1) { +; CHECK-LABEL: f5: +; CHECK: lgrl [[REG:%r[0-5]]], h@GOT +; CHECK: ch %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %val = load i16 *@h, align 1 + %src2 = sext i16 %val to i32 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check the comparison can be reversed if that allows CHRL to be used. +define i32 @f6(i32 %src2) { +; CHECK-LABEL: f6: +; CHECK: chrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src1 = sext i16 %val to i32 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src2, %src2 + br label %exit +exit: + %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll index aebd1f610d274..8095ed1730124 100644 --- a/test/CodeGen/SystemZ/int-cmp-37.ll +++ b/test/CodeGen/SystemZ/int-cmp-37.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i16 1 +@h = global i16 1, align 1, section "foo" ; Check unsigned comparison. define i32 @f1(i32 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clhrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i16 *@g @@ -26,7 +27,7 @@ exit: ; Check signed comparison. define i32 @f2(i32 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: clhrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i32 @f3(i32 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clhrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i16 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i32 @f4(i32 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clhrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i16 *@g @@ -79,3 +80,42 @@ exit: %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] ret i32 %res } + +; Repeat f1 with an unaligned address. +define i32 @f5(i32 %src1) { +; CHECK-LABEL: f5: +; CHECK: lgrl [[REG:%r[0-5]]], h@GOT +; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]]) +; CHECK: clrjl %r2, [[VAL]], +; CHECK: br %r14 +entry: + %val = load i16 *@h, align 1 + %src2 = zext i16 %val to i32 + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check the comparison can be reversed if that allows CLHRL to be used. +define i32 @f6(i32 %src2) { +; CHECK-LABEL: f6: +; CHECK: clhrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src1 = zext i16 %val to i32 + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src2, %src2 + br label %exit +exit: + %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll index 347073027554a..9017583787298 100644 --- a/test/CodeGen/SystemZ/int-cmp-38.ll +++ b/test/CodeGen/SystemZ/int-cmp-38.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i32 1 +@h = global i32 1, align 2, section "foo" ; Check signed comparisons. define i32 @f1(i32 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: crl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %src2 = load i32 *@g @@ -25,9 +26,9 @@ exit: ; Check unsigned comparisons. define i32 @f2(i32 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %src2 = load i32 *@g @@ -43,9 +44,9 @@ exit: ; Check equality, which can use CRL or CLRL. define i32 @f3(i32 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: c{{l?}}rl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %src2 = load i32 *@g @@ -61,9 +62,9 @@ exit: ; ...likewise inequality. define i32 @f4(i32 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: c{{l?}}rl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %src2 = load i32 *@g @@ -76,3 +77,59 @@ exit: %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] ret i32 %res } + +; Repeat f1 with an unaligned address. +define i32 @f5(i32 %src1) { +; CHECK-LABEL: f5: +; CHECK: larl [[REG:%r[0-5]]], h +; CHECK: c %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %src2 = load i32 *@h, align 2 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Repeat f2 with an unaligned address. +define i32 @f6(i32 %src1) { +; CHECK-LABEL: f6: +; CHECK: larl [[REG:%r[0-5]]], h +; CHECK: cl %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %src2 = load i32 *@h, align 2 + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check the comparison can be reversed if that allows CRL to be used. +define i32 @f7(i32 %src2) { +; CHECK-LABEL: f7: +; CHECK: crl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %src1 = load i32 *@g + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src2, %src2 + br label %exit +exit: + %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll index 1129dce84a44e..fc9547d4ceb4d 100644 --- a/test/CodeGen/SystemZ/int-cmp-39.ll +++ b/test/CodeGen/SystemZ/int-cmp-39.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i16 1 +@h = global i16 1, align 1, section "foo" ; Check signed comparison. define i64 @f1(i64 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cghrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i16 *@g @@ -26,7 +27,7 @@ exit: ; Check unsigned comparison, which cannot use CHRL. define i64 @f2(i64 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: cghrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i64 @f3(i64 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cghrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i16 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i64 @f4(i64 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cghrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i16 *@g @@ -79,3 +80,42 @@ exit: %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] ret i64 %res } + +; Repeat f1 with an unaligned address. +define i64 @f5(i64 %src1) { +; CHECK-LABEL: f5: +; CHECK: lgrl [[REG:%r[0-5]]], h@GOT +; CHECK: cgh %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %val = load i16 *@h, align 1 + %src2 = sext i16 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check the comparison can be reversed if that allows CGHRL to be used. +define i64 @f6(i64 %src2) { +; CHECK-LABEL: f6: +; CHECK: cghrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src1 = sext i16 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src2, %src2 + br label %exit +exit: + %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll index 8d9fd9aa140a4..9c532f1cbc6b6 100644 --- a/test/CodeGen/SystemZ/int-cmp-40.ll +++ b/test/CodeGen/SystemZ/int-cmp-40.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i16 1 +@h = global i16 1, align 1, section "foo" ; Check unsigned comparison. define i64 @f1(i64 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clghrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i16 *@g @@ -26,7 +27,7 @@ exit: ; Check signed comparison. define i64 @f2(i64 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: clghrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i64 @f3(i64 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clghrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i16 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i64 @f4(i64 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clghrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i16 *@g @@ -79,3 +80,42 @@ exit: %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] ret i64 %res } + +; Repeat f1 with an unaligned address. +define i64 @f5(i64 %src1) { +; CHECK-LABEL: f5: +; CHECK: lgrl [[REG:%r[0-5]]], h@GOT +; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]]) +; CHECK: clgrjl %r2, [[VAL]], +; CHECK: br %r14 +entry: + %val = load i16 *@h, align 1 + %src2 = zext i16 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check the comparison can be reversed if that allows CLGHRL to be used. +define i64 @f6(i64 %src2) { +; CHECK-LABEL: f6: +; CHECK: clghrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src1 = zext i16 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src2, %src2 + br label %exit +exit: + %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll index 0808bffe6d3e8..77f6e7d76f1c4 100644 --- a/test/CodeGen/SystemZ/int-cmp-41.ll +++ b/test/CodeGen/SystemZ/int-cmp-41.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i32 1 +@h = global i32 1, align 2, section "foo" ; Check signed comparison. define i64 @f1(i64 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cgfrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i32 *@g @@ -26,7 +27,7 @@ exit: ; Check unsigned comparison, which cannot use CHRL. define i64 @f2(i64 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: cgfrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i64 @f3(i64 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: cgfrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i32 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i64 @f4(i64 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: cgfrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i32 *@g @@ -79,3 +80,42 @@ exit: %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] ret i64 %res } + +; Repeat f1 with an unaligned address. +define i64 @f5(i64 %src1) { +; CHECK-LABEL: f5: +; CHECK: larl [[REG:%r[0-5]]], h +; CHECK: cgf %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %val = load i32 *@h, align 2 + %src2 = sext i32 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check the comparison can be reversed if that allows CGFRL to be used. +define i64 @f6(i64 %src2) { +; CHECK-LABEL: f6: +; CHECK: cgfrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src1 = sext i32 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src2, %src2 + br label %exit +exit: + %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll index 5c67581dc29a7..94ef0082c4410 100644 --- a/test/CodeGen/SystemZ/int-cmp-42.ll +++ b/test/CodeGen/SystemZ/int-cmp-42.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i32 1 +@h = global i32 1, align 2, section "foo" ; Check unsigned comparison. define i64 @f1(i64 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: clgfrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %val = load i32 *@g @@ -26,7 +27,7 @@ exit: ; Check signed comparison. define i64 @f2(i64 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: clgfrl ; CHECK: br %r14 entry: @@ -44,9 +45,9 @@ exit: ; Check equality. define i64 @f3(i64 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: clgfrl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %val = load i32 *@g @@ -63,9 +64,9 @@ exit: ; Check inequality. define i64 @f4(i64 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: clgfrl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %val = load i32 *@g @@ -79,3 +80,42 @@ exit: %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] ret i64 %res } + +; Repeat f1 with an unaligned address. +define i64 @f5(i64 %src1) { +; CHECK-LABEL: f5: +; CHECK: larl [[REG:%r[0-5]]], h +; CHECK: clgf %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %val = load i32 *@h, align 2 + %src2 = zext i32 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check the comparison can be reversed if that allows CLGFRL to be used. +define i64 @f6(i64 %src2) { +; CHECK-LABEL: f6: +; CHECK: clgfrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src1 = zext i32 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src2, %src2 + br label %exit +exit: + %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll index f387293b2b1b9..1a625886dec2b 100644 --- a/test/CodeGen/SystemZ/int-cmp-43.ll +++ b/test/CodeGen/SystemZ/int-cmp-43.ll @@ -4,12 +4,13 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s @g = global i64 1 +@h = global i64 1, align 4, section "foo" ; Check signed comparisons. define i64 @f1(i64 %src1) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: cgrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %src2 = load i64 *@g @@ -25,9 +26,9 @@ exit: ; Check unsigned comparisons. define i64 @f2(i64 %src1) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: clgrl %r2, g -; CHECK-NEXT: j{{g?}}l +; CHECK-NEXT: jl ; CHECK: br %r14 entry: %src2 = load i64 *@g @@ -43,9 +44,9 @@ exit: ; Check equality, which can use CRL or CLRL. define i64 @f3(i64 %src1) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: c{{l?}}grl %r2, g -; CHECK-NEXT: j{{g?}}e +; CHECK-NEXT: je ; CHECK: br %r14 entry: %src2 = load i64 *@g @@ -61,9 +62,9 @@ exit: ; ...likewise inequality. define i64 @f4(i64 %src1) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: c{{l?}}grl %r2, g -; CHECK-NEXT: j{{g?}}lh +; CHECK-NEXT: jlh ; CHECK: br %r14 entry: %src2 = load i64 *@g @@ -76,3 +77,40 @@ exit: %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] ret i64 %res } + +; Repeat f1 with an unaligned address. +define i64 @f5(i64 %src1) { +; CHECK-LABEL: f5: +; CHECK: larl [[REG:%r[0-5]]], h +; CHECK: cg %r2, 0([[REG]]) +; CHECK-NEXT: jl +; CHECK: br %r14 +entry: + %src2 = load i64 *@h, align 4 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check the comparison can be reversed if that allows CGRL to be used. +define i64 @f6(i64 %src2) { +; CHECK-LABEL: f6: +; CHECK: cgrl %r2, g +; CHECK-NEXT: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %src1 = load i64 *@g + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src2, %src2 + br label %exit +exit: + %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll new file mode 100644 index 0000000000000..ae0133f10860d --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-44.ll @@ -0,0 +1,799 @@ +; Test that compares are ommitted if CC already has the right value +; (z10 version). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo() + +; Addition provides enough for equality comparisons with zero. First teest +; the EQ case. +define i32 @f1(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f1: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with NE. +define i32 @f2(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; SLT requires a comparison. +define i32 @f3(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SLE too. +define i32 @f4(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f4: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijle %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SGT too. +define i32 @f5(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f5: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SGE too. +define i32 @f6(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f6: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijhe %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sge i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Subtraction also provides enough for equality comparisons with zero. +define i32 @f7(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f7: +; CHECK: s %r2, 0(%r4) +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load i32 *%dest + %res = sub i32 %a, %cur + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...but not for ordered comparisons. +define i32 @f8(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f8: +; CHECK: s %r2, 0(%r4) +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load i32 *%dest + %res = sub i32 %a, %cur + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Logic register-register instructions also provide enough for equality +; comparisons with zero. +define i32 @f9(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f9: +; CHECK: nr %r2, %r3 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, %b + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...but not for ordered comparisons. +define i32 @f10(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f10: +; CHECK: nr %r2, %r3 +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, %b + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Logic register-immediate instructions also provide enough for equality +; comparisons with zero if the immediate covers the whole register. +define i32 @f11(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f11: +; CHECK: nilf %r2, 100000001 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, 100000001 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Partial logic register-immediate instructions do not provide simple +; zero results. +define i32 @f12(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f12: +; CHECK: nill %r2, 65436 +; CHECK-NEXT: cijlh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, -100 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; SRA provides the same CC result as a comparison with zero. +define i32 @f13(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f13: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with NE. +define i32 @f14(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f14: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jlh .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SLT. +define i32 @f15(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f15: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SLE. +define i32 @f16(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f16: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SGT. +define i32 @f17(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f17: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SGE. +define i32 @f18(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f18: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jhe .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sge i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; RISBG provides the same result as a comparison against zero. +; Test the EQ case. +define i64 @f19(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f19: +; CHECK: risbg %r2, %r3, 0, 190, 0 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i64 %b, -2 + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; ...and the SLT case. +define i64 @f20(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f20: +; CHECK: risbg %r2, %r3, 0, 190, 0 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i64 %b, -2 + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test a case where the register we're testing is set by a non-CC-clobbering +; instruction. +define i32 @f21(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f21: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %res = call i32 asm "blah $0", "=r,0" (i32 %add) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with a CC-clobbering instruction. +define i32 @f22(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f22: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Check that stores do not interfere. +define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) { +; CHECK-LABEL: f23: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: st %r2, 0(%r4) +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + store i32 %res, i32 *%dest1 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest2 + br label %exit + +exit: + ret i32 %res +} + +; Check that calls do interfere. +define void @f24(i32 *%ptr) { +; CHECK-LABEL: f24: +; CHECK: afi [[REG:%r[0-9]+]], 1000000 +; CHECK-NEXT: brasl %r14, foo@PLT +; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}} +; CHECK: br %r14 +entry: + %val = load i32 *%ptr + %xor = xor i32 %val, 1 + %add = add i32 %xor, 1000000 + call void @foo() + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; Check that inline asms don't interfere if they don't clobber CC. +define void @f25(i32 %a, i32 *%ptr) { +; CHECK-LABEL: f25: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + call void asm sideeffect "blah", "r"(i32 %add) + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; ...but do interfere if they do clobber CC. +define void @f26(i32 %a, i32 *%ptr) { +; CHECK-LABEL: f26: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cijlh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + call void asm sideeffect "blah", "r,~{cc}"(i32 %add) + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; Test a case where CC is set based on a different register from the +; compare input. +define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) { +; CHECK-LABEL: f27: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: sr %r3, %r2 +; CHECK-NEXT: st %r3, 0(%r4) +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %sub = sub i32 %b, %add + store i32 %sub, i32 *%dest1 + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %sub, i32 *%dest2 + br label %exit + +exit: + ret i32 %add +} + +; Make sure that we don't confuse a base register for a destination. +define void @f28(i64 %a, i64 *%dest) { +; CHECK-LABEL: f28: +; CHECK: xi 0(%r2), 15 +; CHECK: cgije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %ptr = inttoptr i64 %a to i8 * + %val = load i8 *%ptr + %xor = xor i8 %val, 15 + store i8 %xor, i8 *%ptr + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %a, i64 *%dest + br label %exit + +exit: + ret void +} + +; Test that L gets converted to LT where useful. +define i32 @f29(i64 %base, i64 %index, i32 *%dest) { +; CHECK-LABEL: f29: +; CHECK: lt %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %res = load i32 *%ptr + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %res, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Test that LY gets converted to LT where useful. +define i32 @f30(i64 %base, i64 %index, i32 *%dest) { +; CHECK-LABEL: f30: +; CHECK: lt %r2, 100000({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 100000 + %ptr = inttoptr i64 %add2 to i32 * + %res = load i32 *%ptr + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %res, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Test that LG gets converted to LTG where useful. +define i64 @f31(i64 %base, i64 %index, i64 *%dest) { +; CHECK-LABEL: f31: +; CHECK: ltg %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jhe .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %res = load i64 *%ptr + %cmp = icmp sge i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %res, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test that LGF gets converted to LTGF where useful. +define i64 @f32(i64 %base, i64 %index, i64 *%dest) { +; CHECK-LABEL: f32: +; CHECK: ltgf %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %val = load i32 *%ptr + %res = sext i32 %val to i64 + %cmp = icmp sgt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %res, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test that LR gets converted to LTR where useful. +define i32 @f33(i32 %dummy, i32 %val, i32 *%dest) { +; CHECK-LABEL: f33: +; CHECK: ltr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r2}"(i32 %val) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +} + +; Test that LGR gets converted to LTGR where useful. +define i64 @f34(i64 %dummy, i64 %val, i64 *%dest) { +; CHECK-LABEL: f34: +; CHECK: ltgr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r2}"(i64 %val) + %cmp = icmp sgt i64 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %val, i64 *%dest + br label %exit + +exit: + ret i64 %val +} + +; Test that LGFR gets converted to LTGFR where useful. +define i64 @f35(i64 %dummy, i32 %val, i64 *%dest) { +; CHECK-LABEL: f35: +; CHECK: ltgfr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %ext = sext i32 %val to i64 + call void asm sideeffect "blah $0", "{r2}"(i64 %ext) + %cmp = icmp sgt i64 %ext, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %ext, i64 *%dest + br label %exit + +exit: + ret i64 %ext +} + +; Test a case where it is the source rather than destination of LR that +; we need. +define i32 @f36(i32 %val, i32 %dummy, i32 *%dest) { +; CHECK-LABEL: f36: +; CHECK: ltr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r3}"(i32 %val) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +} + +; Test a case where it is the source rather than destination of LGR that +; we need. +define i64 @f37(i64 %val, i64 %dummy, i64 *%dest) { +; CHECK-LABEL: f37: +; CHECK: ltgr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r3}"(i64 %val) + %cmp = icmp slt i64 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %val, i64 *%dest + br label %exit + +exit: + ret i64 %val +} + +; Test a case where it is the source rather than destination of LGFR that +; we need. +define i32 @f38(i32 %val, i64 %dummy, i32 *%dest) { +; CHECK-LABEL: f38: +; CHECK: ltgfr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %ext = sext i32 %val to i64 + call void asm sideeffect "blah $0", "{r3}"(i64 %ext) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/int-cmp-45.ll b/test/CodeGen/SystemZ/int-cmp-45.ll new file mode 100644 index 0000000000000..753a528e46c9d --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-45.ll @@ -0,0 +1,115 @@ +; Test that compares are ommitted if CC already has the right value +; (z196 version). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Addition provides enough for equality comparisons with zero. First teest +; the EQ case with LOC. +define i32 @f1(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f1: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: loce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %c, i32 %b + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f2(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f2: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stoce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %b, i32 %c + store i32 %newval, i32 *%cptr + ret i32 %add +} + +; Reverse the select order and test with LOCR. +define i32 @f3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f3: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locrne %r3, %r4 +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with LOC. +define i32 @f4(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f4: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locne %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f5(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f5: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stocne %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %c, i32 %b + store i32 %newval, i32 *%cptr + ret i32 %add +} + +; Change the EQ in f3 to NE. +define i32 @f6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f6: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locre %r3, %r4 +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with LOC. +define i32 @f7(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f7: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: loce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f8(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f8: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stoce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %c, i32 %b + store i32 %newval, i32 *%cptr + ret i32 %add +} diff --git a/test/CodeGen/SystemZ/int-cmp-46.ll b/test/CodeGen/SystemZ/int-cmp-46.ll new file mode 100644 index 0000000000000..f311942b9f862 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-46.ll @@ -0,0 +1,491 @@ +; Test the use of TEST UNDER MASK for 32-bit operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +@g = global i32 0 + +; Check the lowest useful TMLL value. +define void @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: tmll %r2, 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMLL range. +define void @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: tmll %r2, 65535 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 65535 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the lowest useful TMLH value, which is the next value up. +define void @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: tmlh %r2, 1 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 65536 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the next value up again, which cannot use TM. +define void @f4(i32 %a) { +; CHECK-LABEL: f4: +; CHECK-NOT: {{tm[lh].}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 4294901759 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMLH range. +define void @f5(i32 %a) { +; CHECK-LABEL: f5: +; CHECK: tmlh %r2, 65535 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 4294901760 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for LT comparisons that are equivalent to +; an equality comparison with zero. +define void @f6(i32 %a) { +; CHECK-LABEL: f6: +; CHECK: tmll %r2, 240 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 240 + %cmp = icmp slt i32 %and, 16 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again with LE. +define void @f7(i32 %a) { +; CHECK-LABEL: f7: +; CHECK: tmll %r2, 240 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 240 + %cmp = icmp sle i32 %and, 15 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for GE comparisons that are equivalent to +; an inequality comparison with zero. +define void @f8(i32 %a) { +; CHECK-LABEL: f8: +; CHECK: tmll %r2, 240 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 240 + %cmp = icmp uge i32 %and, 16 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again with GT. +define void @f9(i32 %a) { +; CHECK-LABEL: f9: +; CHECK: tmll %r2, 240 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 240 + %cmp = icmp ugt i32 %and, 15 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for LT comparisons that effectively +; test whether the top bit is clear. +define void @f10(i32 %a) { +; CHECK-LABEL: f10: +; CHECK: tmll %r2, 35 +; CHECK: jle {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 35 + %cmp = icmp ult i32 %and, 8 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again with LE. +define void @f11(i32 %a) { +; CHECK-LABEL: f11: +; CHECK: tmll %r2, 35 +; CHECK: jle {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 35 + %cmp = icmp ule i32 %and, 31 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for GE comparisons that effectively test +; whether the top bit is set. +define void @f12(i32 %a) { +; CHECK-LABEL: f12: +; CHECK: tmll %r2, 140 +; CHECK: jnle {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 140 + %cmp = icmp uge i32 %and, 128 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again for GT. +define void @f13(i32 %a) { +; CHECK-LABEL: f13: +; CHECK: tmll %r2, 140 +; CHECK: jnle {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 140 + %cmp = icmp ugt i32 %and, 126 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for equality comparisons with the mask. +define void @f14(i32 %a) { +; CHECK-LABEL: f14: +; CHECK: tmll %r2, 101 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 101 + %cmp = icmp eq i32 %and, 101 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for inequality comparisons with the mask. +define void @f15(i32 %a) { +; CHECK-LABEL: f15: +; CHECK: tmll %r2, 65519 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 65519 + %cmp = icmp ne i32 %and, 65519 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for LT comparisons that are equivalent +; to inequality comparisons with the mask. +define void @f16(i32 %a) { +; CHECK-LABEL: f16: +; CHECK: tmll %r2, 130 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 130 + %cmp = icmp ult i32 %and, 129 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again with LE. +define void @f17(i32 %a) { +; CHECK-LABEL: f17: +; CHECK: tmll %r2, 130 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 130 + %cmp = icmp ule i32 %and, 128 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for GE comparisons that are equivalent +; to equality comparisons with the mask. +define void @f18(i32 %a) { +; CHECK-LABEL: f18: +; CHECK: tmll %r2, 194 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 194 + %cmp = icmp uge i32 %and, 193 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; ...same again for GT. +define void @f19(i32 %a) { +; CHECK-LABEL: f19: +; CHECK: tmll %r2, 194 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 194 + %cmp = icmp ugt i32 %and, 192 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for equality comparisons for the low bit +; when the mask has two bits. +define void @f20(i32 %a) { +; CHECK-LABEL: f20: +; CHECK: tmll %r2, 20 +; CHECK: jl {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 20 + %cmp = icmp eq i32 %and, 4 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for inequality comparisons for the low bit +; when the mask has two bits. +define void @f21(i32 %a) { +; CHECK-LABEL: f21: +; CHECK: tmll %r2, 20 +; CHECK: jnl {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 20 + %cmp = icmp ne i32 %and, 4 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for equality comparisons for the high bit +; when the mask has two bits. +define void @f22(i32 %a) { +; CHECK-LABEL: f22: +; CHECK: tmll %r2, 20 +; CHECK: jh {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 20 + %cmp = icmp eq i32 %and, 16 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can use TMLL for inequality comparisons for the high bit +; when the mask has two bits. +define void @f23(i32 %a) { +; CHECK-LABEL: f23: +; CHECK: tmll %r2, 20 +; CHECK: jnh {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i32 %a, 20 + %cmp = icmp ne i32 %and, 16 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can fold an SHL into a TMxx mask. +define void @f24(i32 %a) { +; CHECK-LABEL: f24: +; CHECK: tmll %r2, 255 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %shl = shl i32 %a, 12 + %and = and i32 %shl, 1044480 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can fold an SHR into a TMxx mask. +define void @f25(i32 %a) { +; CHECK-LABEL: f25: +; CHECK: tmlh %r2, 512 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %shr = lshr i32 %a, 25 + %and = and i32 %shr, 1 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-47.ll b/test/CodeGen/SystemZ/int-cmp-47.ll new file mode 100644 index 0000000000000..9ebcbfe525bab --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-47.ll @@ -0,0 +1,234 @@ +; Test the use of TEST UNDER MASK for 64-bit operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +@g = global i32 0 + +; Check the lowest useful TMLL value. +define void @f1(i64 %a) { +; CHECK-LABEL: f1: +; CHECK: tmll %r2, 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 1 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMLL range. +define void @f2(i64 %a) { +; CHECK-LABEL: f2: +; CHECK: tmll %r2, 65535 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 65535 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the lowest useful TMLH value, which is the next value up. +define void @f3(i64 %a) { +; CHECK-LABEL: f3: +; CHECK: tmlh %r2, 1 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 65536 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the next value up again, which cannot use TM. +define void @f4(i64 %a) { +; CHECK-LABEL: f4: +; CHECK-NOT: {{tm[lh].}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 4294901759 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMLH range. +define void @f5(i64 %a) { +; CHECK-LABEL: f5: +; CHECK: tmlh %r2, 65535 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 4294901760 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the lowest useful TMHL value. +define void @f6(i64 %a) { +; CHECK-LABEL: f6: +; CHECK: tmhl %r2, 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 4294967296 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the next value up again, which cannot use TM. +define void @f7(i64 %a) { +; CHECK-LABEL: f7: +; CHECK-NOT: {{tm[lh].}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 4294967297 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMHL range. +define void @f8(i64 %a) { +; CHECK-LABEL: f8: +; CHECK: tmhl %r2, 65535 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 281470681743360 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the lowest useful TMHH value. +define void @f9(i64 %a) { +; CHECK-LABEL: f9: +; CHECK: tmhh %r2, 1 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 281474976710656 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the high end of the TMHH range. +define void @f10(i64 %a) { +; CHECK-LABEL: f10: +; CHECK: tmhh %r2, 65535 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %and = and i64 %a, 18446462598732840960 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can fold an SHL into a TMxx mask. +define void @f11(i64 %a) { +; CHECK-LABEL: f11: +; CHECK: tmhl %r2, 32768 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %shl = shl i64 %a, 1 + %and = and i64 %shl, 281474976710656 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we can fold an SHR into a TMxx mask. +define void @f12(i64 %a) { +; CHECK-LABEL: f12: +; CHECK: tmhh %r2, 256 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 +entry: + %shr = lshr i64 %a, 56 + %and = and i64 %shr, 1 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-48.ll b/test/CodeGen/SystemZ/int-cmp-48.ll new file mode 100644 index 0000000000000..d7c6370a23233 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-48.ll @@ -0,0 +1,245 @@ +; Test the use of TM and TMY. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +@g = global i32 0 + +; Check a simple branching use of TM. +define void @f1(i8 *%src) { +; CHECK-LABEL: f1: +; CHECK: tm 0(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %byte = load i8 *%src + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + + +; Check that we do not fold across an aliasing store. +define void @f2(i8 *%src) { +; CHECK-LABEL: f2: +; CHECK: llc [[REG:%r[0-5]]], 0(%r2) +; CHECK: mvi 0(%r2), 0 +; CHECK: tmll [[REG]], 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 +entry: + %byte = load i8 *%src + store i8 0, i8 *%src + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check a simple select-based use of TM. +define double @f3(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f3: +; CHECK: tm 0(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check that we do not fold across an aliasing store. +define double @f4(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f4: +; CHECK: tm 0(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + store i8 0, i8 *%src + ret double %res +} + +; Check an inequality check. +define double @f5(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f5: +; CHECK: tm 0(%r2), 1 +; CHECK: jne {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 1 + %cmp = icmp ne i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check that we can also use TM for equality comparisons with the mask. +define double @f6(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f6: +; CHECK: tm 0(%r2), 254 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 254 + %cmp = icmp eq i8 %and, 254 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check inequality comparisons with the mask. +define double @f7(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f7: +; CHECK: tm 0(%r2), 254 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 254 + %cmp = icmp ne i8 %and, 254 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check that we do not use the memory TM instruction when CC is being tested +; for 2. +define double @f8(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f8: +; CHECK: llc [[REG:%r[0-5]]], 0(%r2) +; CHECK: tmll [[REG]], 3 +; CHECK: jh {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 3 + %cmp = icmp eq i8 %and, 2 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; ...likewise 1. +define double @f9(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f9: +; CHECK: llc [[REG:%r[0-5]]], 0(%r2) +; CHECK: tmll [[REG]], 3 +; CHECK: jl {{\.L.*}} +; CHECK: br %r14 + %byte = load i8 *%src + %and = and i8 %byte, 3 + %cmp = icmp eq i8 %and, 1 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the high end of the TM range. +define double @f10(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f10: +; CHECK: tm 4095(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the low end of the positive TMY range. +define double @f11(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f11: +; CHECK: tmy 4096(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the high end of the TMY range. +define double @f12(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f12: +; CHECK: tmy 524287(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the next byte up, which needs separate address logic. +define double @f13(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f13: +; CHECK: agfi %r2, 524288 +; CHECK: tm 0(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the low end of the TMY range. +define double @f14(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f14: +; CHECK: tmy -524288(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check the next byte down, which needs separate address logic. +define double @f15(i8 *%src, double %a, double %b) { +; CHECK-LABEL: f15: +; CHECK: agfi %r2, -524289 +; CHECK: tm 0(%r2), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; Check that TM(Y) does not allow an index +define double @f16(i8 *%src, i64 %index, double %a, double %b) { +; CHECK-LABEL: f16: +; CHECK: tm 0({{%r[1-5]}}), 1 +; CHECK: je {{\.L.*}} +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 %index + %byte = load i8 *%ptr + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-49.ll b/test/CodeGen/SystemZ/int-cmp-49.ll new file mode 100644 index 0000000000000..83f18a2a18a62 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-49.ll @@ -0,0 +1,49 @@ +; That that we don't try to use z196 instructions on z10 for TMHH and TMHL. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0 | FileCheck %s + +@g = global i32 0 + +; Check the lowest useful TMHL value. +define void @f1(i64 %a) { +; CHECK-LABEL: f1: +; CHECK-NOT: risblg +; CHECK-NOT: risbhg +; CHECK: tmhl {{%r[0-5]}}, 1 +; CHECK-NOT: risblg +; CHECK-NOT: risbhg +; CHECK: br %r14 +entry: + %and = and i64 %a, 4294967296 + %cmp = icmp eq i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check the lowest useful TMHH value. +define void @f2(i64 %a) { +; CHECK-LABEL: f2: +; CHECK-NOT: risblg +; CHECK-NOT: risbhg +; CHECK: tmhh {{%r[0-5]}}, 1 +; CHECK-NOT: risblg +; CHECK-NOT: risbhg +; CHECK: br %r14 +entry: + %and = and i64 %a, 281474976710656 + %cmp = icmp ne i64 %and, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/int-const-01.ll b/test/CodeGen/SystemZ/int-const-01.ll index a580154e6b572..e94c05897faec 100644 --- a/test/CodeGen/SystemZ/int-const-01.ll +++ b/test/CodeGen/SystemZ/int-const-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare void @foo(i32, i32, i32, i32) + ; Check 0. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhi %r2, 0 ; CHECK: br %r14 ret i32 0 @@ -12,7 +14,7 @@ define i32 @f1() { ; Check the high end of the LHI range. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhi %r2, 32767 ; CHECK: br %r14 ret i32 32767 @@ -20,7 +22,7 @@ define i32 @f2() { ; Check the next value up, which must use LLILL instead. define i32 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llill %r2, 32768 ; CHECK: br %r14 ret i32 32768 @@ -28,7 +30,7 @@ define i32 @f3() { ; Check the high end of the LLILL range. define i32 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llill %r2, 65535 ; CHECK: br %r14 ret i32 65535 @@ -36,7 +38,7 @@ define i32 @f4() { ; Check the first useful LLILH value, which is the next one up. define i32 @f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llilh %r2, 1 ; CHECK: br %r14 ret i32 65536 @@ -44,7 +46,7 @@ define i32 @f5() { ; Check the first useful IILF value, which is the next one up again. define i32 @f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: iilf %r2, 65537 ; CHECK: br %r14 ret i32 65537 @@ -52,7 +54,7 @@ define i32 @f6() { ; Check the high end of the LLILH range. define i32 @f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llilh %r2, 65535 ; CHECK: br %r14 ret i32 -65536 @@ -60,7 +62,7 @@ define i32 @f7() { ; Check the next value up, which must use IILF. define i32 @f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: iilf %r2, 4294901761 ; CHECK: br %r14 ret i32 -65535 @@ -68,7 +70,7 @@ define i32 @f8() { ; Check the highest useful IILF value, 0xffff7fff define i32 @f9() { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: iilf %r2, 4294934527 ; CHECK: br %r14 ret i32 -32769 @@ -76,7 +78,7 @@ define i32 @f9() { ; Check the next value up, which should use LHI. define i32 @f10() { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: lhi %r2, -32768 ; CHECK: br %r14 ret i32 -32768 @@ -84,8 +86,28 @@ define i32 @f10() { ; Check -1. define i32 @f11() { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: lhi %r2, -1 ; CHECK: br %r14 ret i32 -1 } + +; Check that constant loads are rematerialized. +define i32 @f12() { +; CHECK-LABEL: f12: +; CHECK-DAG: lhi %r2, 42 +; CHECK-DAG: llill %r3, 32768 +; CHECK-DAG: llilh %r4, 1 +; CHECK-DAG: iilf %r5, 65537 +; CHECK: brasl %r14, foo@PLT +; CHECK-DAG: lhi %r2, 42 +; CHECK-DAG: llill %r3, 32768 +; CHECK-DAG: llilh %r4, 1 +; CHECK-DAG: iilf %r5, 65537 +; CHECK: brasl %r14, foo@PLT +; CHECK: lhi %r2, 42 +; CHECK: br %r14 + call void @foo(i32 42, i32 32768, i32 65536, i32 65537) + call void @foo(i32 42, i32 32768, i32 65536, i32 65537) + ret i32 42 +} diff --git a/test/CodeGen/SystemZ/int-const-02.ll b/test/CodeGen/SystemZ/int-const-02.ll index b345e3f2a2a1a..e71abc69b3b66 100644 --- a/test/CodeGen/SystemZ/int-const-02.ll +++ b/test/CodeGen/SystemZ/int-const-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare void @foo(i64, i64, i64, i64) + ; Check 0. define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lghi %r2, 0 ; CHECK-NEXT: br %r14 ret i64 0 @@ -12,7 +14,7 @@ define i64 @f1() { ; Check the high end of the LGHI range. define i64 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lghi %r2, 32767 ; CHECK-NEXT: br %r14 ret i64 32767 @@ -20,7 +22,7 @@ define i64 @f2() { ; Check the next value up, which must use LLILL instead. define i64 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llill %r2, 32768 ; CHECK-NEXT: br %r14 ret i64 32768 @@ -28,7 +30,7 @@ define i64 @f3() { ; Check the high end of the LLILL range. define i64 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llill %r2, 65535 ; CHECK-NEXT: br %r14 ret i64 65535 @@ -36,7 +38,7 @@ define i64 @f4() { ; Check the first useful LLILH value, which is the next one up. define i64 @f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llilh %r2, 1 ; CHECK-NEXT: br %r14 ret i64 65536 @@ -44,7 +46,7 @@ define i64 @f5() { ; Check the first useful LGFI value, which is the next one up again. define i64 @f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgfi %r2, 65537 ; CHECK-NEXT: br %r14 ret i64 65537 @@ -52,7 +54,7 @@ define i64 @f6() { ; Check the high end of the LGFI range. define i64 @f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgfi %r2, 2147483647 ; CHECK-NEXT: br %r14 ret i64 2147483647 @@ -60,7 +62,7 @@ define i64 @f7() { ; Check the next value up, which should use LLILH instead. define i64 @f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llilh %r2, 32768 ; CHECK-NEXT: br %r14 ret i64 2147483648 @@ -68,7 +70,7 @@ define i64 @f8() { ; Check the next value up again, which should use LLILF. define i64 @f9() { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: llilf %r2, 2147483649 ; CHECK-NEXT: br %r14 ret i64 2147483649 @@ -76,7 +78,7 @@ define i64 @f9() { ; Check the high end of the LLILH range. define i64 @f10() { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llilh %r2, 65535 ; CHECK-NEXT: br %r14 ret i64 4294901760 @@ -84,7 +86,7 @@ define i64 @f10() { ; Check the next value up, which must use LLILF. define i64 @f11() { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: llilf %r2, 4294901761 ; CHECK-NEXT: br %r14 ret i64 4294901761 @@ -92,7 +94,7 @@ define i64 @f11() { ; Check the high end of the LLILF range. define i64 @f12() { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: llilf %r2, 4294967295 ; CHECK-NEXT: br %r14 ret i64 4294967295 @@ -100,7 +102,7 @@ define i64 @f12() { ; Check the lowest useful LLIHL value, which is the next one up. define i64 @f13() { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: br %r14 ret i64 4294967296 @@ -108,7 +110,7 @@ define i64 @f13() { ; Check the next value up, which must use a combination of two instructions. define i64 @f14() { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oill %r2, 1 ; CHECK-NEXT: br %r14 @@ -117,7 +119,7 @@ define i64 @f14() { ; Check the high end of the OILL range. define i64 @f15() { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oill %r2, 65535 ; CHECK-NEXT: br %r14 @@ -126,7 +128,7 @@ define i64 @f15() { ; Check the next value up, which should use OILH instead. define i64 @f16() { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oilh %r2, 1 ; CHECK-NEXT: br %r14 @@ -135,7 +137,7 @@ define i64 @f16() { ; Check the next value up again, which should use OILF. define i64 @f17() { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oilf %r2, 65537 ; CHECK-NEXT: br %r14 @@ -144,7 +146,7 @@ define i64 @f17() { ; Check the high end of the OILH range. define i64 @f18() { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oilh %r2, 65535 ; CHECK-NEXT: br %r14 @@ -153,7 +155,7 @@ define i64 @f18() { ; Check the high end of the OILF range. define i64 @f19() { -; CHECK: f19: +; CHECK-LABEL: f19: ; CHECK: llihl %r2, 1 ; CHECK-NEXT: oilf %r2, 4294967295 ; CHECK-NEXT: br %r14 @@ -162,7 +164,7 @@ define i64 @f19() { ; Check the high end of the LLIHL range. define i64 @f20() { -; CHECK: f20: +; CHECK-LABEL: f20: ; CHECK: llihl %r2, 65535 ; CHECK-NEXT: br %r14 ret i64 281470681743360 @@ -170,7 +172,7 @@ define i64 @f20() { ; Check the lowest useful LLIHH value, which is 1<<32 greater than the above. define i64 @f21() { -; CHECK: f21: +; CHECK-LABEL: f21: ; CHECK: llihh %r2, 1 ; CHECK-NEXT: br %r14 ret i64 281474976710656 @@ -178,7 +180,7 @@ define i64 @f21() { ; Check the lowest useful LLIHF value, which is 1<<32 greater again. define i64 @f22() { -; CHECK: f22: +; CHECK-LABEL: f22: ; CHECK: llihf %r2, 65537 ; CHECK-NEXT: br %r14 ret i64 281479271677952 @@ -186,7 +188,7 @@ define i64 @f22() { ; Check the highest end of the LLIHH range. define i64 @f23() { -; CHECK: f23: +; CHECK-LABEL: f23: ; CHECK: llihh %r2, 65535 ; CHECK-NEXT: br %r14 ret i64 -281474976710656 @@ -194,7 +196,7 @@ define i64 @f23() { ; Check the next value up, which must use OILL too. define i64 @f24() { -; CHECK: f24: +; CHECK-LABEL: f24: ; CHECK: llihh %r2, 65535 ; CHECK-NEXT: oill %r2, 1 ; CHECK-NEXT: br %r14 @@ -203,7 +205,7 @@ define i64 @f24() { ; Check the high end of the LLIHF range. define i64 @f25() { -; CHECK: f25: +; CHECK-LABEL: f25: ; CHECK: llihf %r2, 4294967295 ; CHECK-NEXT: br %r14 ret i64 -4294967296 @@ -211,7 +213,7 @@ define i64 @f25() { ; Check -1. define i64 @f26() { -; CHECK: f26: +; CHECK-LABEL: f26: ; CHECK: lghi %r2, -1 ; CHECK-NEXT: br %r14 ret i64 -1 @@ -219,7 +221,7 @@ define i64 @f26() { ; Check the low end of the LGHI range. define i64 @f27() { -; CHECK: f27: +; CHECK-LABEL: f27: ; CHECK: lghi %r2, -32768 ; CHECK-NEXT: br %r14 ret i64 -32768 @@ -227,7 +229,7 @@ define i64 @f27() { ; Check the next value down, which must use LGFI instead. define i64 @f28() { -; CHECK: f28: +; CHECK-LABEL: f28: ; CHECK: lgfi %r2, -32769 ; CHECK-NEXT: br %r14 ret i64 -32769 @@ -235,7 +237,7 @@ define i64 @f28() { ; Check the low end of the LGFI range. define i64 @f29() { -; CHECK: f29: +; CHECK-LABEL: f29: ; CHECK: lgfi %r2, -2147483648 ; CHECK-NEXT: br %r14 ret i64 -2147483648 @@ -243,9 +245,41 @@ define i64 @f29() { ; Check the next value down, which needs a two-instruction sequence. define i64 @f30() { -; CHECK: f30: +; CHECK-LABEL: f30: ; CHECK: llihf %r2, 4294967295 ; CHECK-NEXT: oilf %r2, 2147483647 ; CHECK-NEXT: br %r14 ret i64 -2147483649 } + +; Check that constant loads are rematerialized. +define i64 @f31() { +; CHECK-LABEL: f31: +; CHECK-DAG: lghi %r2, 42 +; CHECK-DAG: lgfi %r3, 65537 +; CHECK-DAG: llilf %r4, 2147483649 +; CHECK-DAG: llihf %r5, 65537 +; CHECK: brasl %r14, foo@PLT +; CHECK-DAG: llill %r2, 32768 +; CHECK-DAG: llilh %r3, 1 +; CHECK-DAG: llihl %r4, 1 +; CHECK-DAG: llihh %r5, 1 +; CHECK: brasl %r14, foo@PLT +; CHECK-DAG: lghi %r2, 42 +; CHECK-DAG: lgfi %r3, 65537 +; CHECK-DAG: llilf %r4, 2147483649 +; CHECK-DAG: llihf %r5, 65537 +; CHECK: brasl %r14, foo@PLT +; CHECK-DAG: llill %r2, 32768 +; CHECK-DAG: llilh %r3, 1 +; CHECK-DAG: llihl %r4, 1 +; CHECK-DAG: llihh %r5, 1 +; CHECK: brasl %r14, foo@PLT +; CHECK: lghi %r2, 42 +; CHECK: br %r14 + call void @foo(i64 42, i64 65537, i64 2147483649, i64 281479271677952) + call void @foo(i64 32768, i64 65536, i64 4294967296, i64 281474976710656) + call void @foo(i64 42, i64 65537, i64 2147483649, i64 281479271677952) + call void @foo(i64 32768, i64 65536, i64 4294967296, i64 281474976710656) + ret i64 42 +} diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll index 807b7e463ceda..af1cef2c138ad 100644 --- a/test/CodeGen/SystemZ/int-const-03.ll +++ b/test/CodeGen/SystemZ/int-const-03.ll @@ -4,7 +4,7 @@ ; Check the low end of the unsigned range. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mvi 0(%r2), 0 ; CHECK: br %r14 store i8 0, i8 *%ptr @@ -13,7 +13,7 @@ define void @f1(i8 *%ptr) { ; Check the high end of the signed range. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mvi 0(%r2), 127 ; CHECK: br %r14 store i8 127, i8 *%ptr @@ -22,7 +22,7 @@ define void @f2(i8 *%ptr) { ; Check the next value up. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mvi 0(%r2), 128 ; CHECK: br %r14 store i8 -128, i8 *%ptr @@ -31,7 +31,7 @@ define void @f3(i8 *%ptr) { ; Check the high end of the unsigned range. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: mvi 0(%r2), 255 ; CHECK: br %r14 store i8 255, i8 *%ptr @@ -40,7 +40,7 @@ define void @f4(i8 *%ptr) { ; Check -1. define void @f5(i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: mvi 0(%r2), 255 ; CHECK: br %r14 store i8 -1, i8 *%ptr @@ -49,7 +49,7 @@ define void @f5(i8 *%ptr) { ; Check the low end of the signed range. define void @f6(i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: mvi 0(%r2), 128 ; CHECK: br %r14 store i8 -128, i8 *%ptr @@ -58,7 +58,7 @@ define void @f6(i8 *%ptr) { ; Check the next value down. define void @f7(i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mvi 0(%r2), 127 ; CHECK: br %r14 store i8 -129, i8 *%ptr @@ -67,7 +67,7 @@ define void @f7(i8 *%ptr) { ; Check the high end of the MVI range. define void @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: mvi 4095(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -77,7 +77,7 @@ define void @f8(i8 *%src) { ; Check the next byte up, which should use MVIY instead of MVI. define void @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: mviy 4096(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -87,7 +87,7 @@ define void @f9(i8 *%src) { ; Check the high end of the MVIY range. define void @f10(i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: mviy 524287(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -98,7 +98,7 @@ define void @f10(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f11(i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r2, 524288 ; CHECK: mvi 0(%r2), 42 ; CHECK: br %r14 @@ -109,7 +109,7 @@ define void @f11(i8 *%src) { ; Check the high end of the negative MVIY range. define void @f12(i8 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: mviy -1(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -119,7 +119,7 @@ define void @f12(i8 *%src) { ; Check the low end of the MVIY range. define void @f13(i8 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: mviy -524288(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -130,7 +130,7 @@ define void @f13(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f14(i8 *%src) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: agfi %r2, -524289 ; CHECK: mvi 0(%r2), 42 ; CHECK: br %r14 @@ -139,11 +139,11 @@ define void @f14(i8 *%src) { ret void } -; Check that MVI does not allow an index +; Check that MVI does not allow an index. We prefer STC in that case. define void @f15(i64 %src, i64 %index) { -; CHECK: f15: -; CHECK: agr %r2, %r3 -; CHECK: mvi 4095(%r2), 42 +; CHECK-LABEL: f15: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: stc [[TMP]], 4095({{%r2,%r3|%r3,%r2}} ; CHECK: br %r14 %add1 = add i64 %src, %index %add2 = add i64 %add1, 4095 @@ -152,11 +152,11 @@ define void @f15(i64 %src, i64 %index) { ret void } -; Check that MVIY does not allow an index +; Check that MVIY does not allow an index. We prefer STCY in that case. define void @f16(i64 %src, i64 %index) { -; CHECK: f16: -; CHECK: agr %r2, %r3 -; CHECK: mviy 4096(%r2), 42 +; CHECK-LABEL: f16: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: stcy [[TMP]], 4096({{%r2,%r3|%r3,%r2}} ; CHECK: br %r14 %add1 = add i64 %src, %index %add2 = add i64 %add1, 4096 diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll index 41c7306c89aa2..aced50b5601b3 100644 --- a/test/CodeGen/SystemZ/int-const-04.ll +++ b/test/CodeGen/SystemZ/int-const-04.ll @@ -4,7 +4,7 @@ ; Check the low end of the unsigned range. define void @f1(i16 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mvhhi 0(%r2), 0 ; CHECK: br %r14 store i16 0, i16 *%ptr @@ -13,7 +13,7 @@ define void @f1(i16 *%ptr) { ; Check the high end of the signed range. define void @f2(i16 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mvhhi 0(%r2), 32767 ; CHECK: br %r14 store i16 32767, i16 *%ptr @@ -22,7 +22,7 @@ define void @f2(i16 *%ptr) { ; Check the next value up. define void @f3(i16 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mvhhi 0(%r2), -32768 ; CHECK: br %r14 store i16 -32768, i16 *%ptr @@ -31,7 +31,7 @@ define void @f3(i16 *%ptr) { ; Check the high end of the unsigned range. define void @f4(i16 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: mvhhi 0(%r2), -1 ; CHECK: br %r14 store i16 65535, i16 *%ptr @@ -40,7 +40,7 @@ define void @f4(i16 *%ptr) { ; Check -1. define void @f5(i16 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: mvhhi 0(%r2), -1 ; CHECK: br %r14 store i16 -1, i16 *%ptr @@ -49,7 +49,7 @@ define void @f5(i16 *%ptr) { ; Check the low end of the signed range. define void @f6(i16 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: mvhhi 0(%r2), -32768 ; CHECK: br %r14 store i16 -32768, i16 *%ptr @@ -58,7 +58,7 @@ define void @f6(i16 *%ptr) { ; Check the next value down. define void @f7(i16 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mvhhi 0(%r2), 32767 ; CHECK: br %r14 store i16 -32769, i16 *%ptr @@ -67,7 +67,7 @@ define void @f7(i16 *%ptr) { ; Check the high end of the MVHHI range. define void @f8(i16 *%a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: mvhhi 4094(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i16 *%a, i64 2047 @@ -75,34 +75,34 @@ define void @f8(i16 *%a) { ret void } -; Check the next halfword up, which needs separate address logic. -; Other sequences besides this one would be OK. +; Check the next halfword up, which is out of range. We prefer STHY +; in that case. define void @f9(i16 *%a) { -; CHECK: f9: -; CHECK: aghi %r2, 4096 -; CHECK: mvhhi 0(%r2), 42 +; CHECK-LABEL: f9: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: sthy [[TMP]], 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%a, i64 2048 store i16 42, i16 *%ptr ret void } -; Check negative displacements, which also need separate address logic. +; Check negative displacements, for which we again prefer STHY. define void @f10(i16 *%a) { -; CHECK: f10: -; CHECK: aghi %r2, -2 -; CHECK: mvhhi 0(%r2), 42 +; CHECK-LABEL: f10: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: sthy [[TMP]], -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%a, i64 -1 store i16 42, i16 *%ptr ret void } -; Check that MVHHI does not allow an index +; Check that MVHHI does not allow an index. define void @f11(i64 %src, i64 %index) { -; CHECK: f11: -; CHECK: agr %r2, %r3 -; CHECK: mvhhi 0(%r2), 42 +; CHECK-LABEL: f11: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: sth [[TMP]], 0({{%r2,%r3|%r3,%r2}}) ; CHECK: br %r14 %add = add i64 %src, %index %ptr = inttoptr i64 %add to i16 * diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll index b85fd6b682075..98d6851c197d3 100644 --- a/test/CodeGen/SystemZ/int-const-05.ll +++ b/test/CodeGen/SystemZ/int-const-05.ll @@ -4,7 +4,7 @@ ; Check moves of zero. define void @f1(i32 *%a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mvhi 0(%r2), 0 ; CHECK: br %r14 store i32 0, i32 *%a @@ -13,7 +13,7 @@ define void @f1(i32 *%a) { ; Check the high end of the signed 16-bit range. define void @f2(i32 *%a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mvhi 0(%r2), 32767 ; CHECK: br %r14 store i32 32767, i32 *%a @@ -22,7 +22,7 @@ define void @f2(i32 *%a) { ; Check the next value up, which can't use MVHI. define void @f3(i32 *%a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: mvhi ; CHECK: br %r14 store i32 32768, i32 *%a @@ -31,7 +31,7 @@ define void @f3(i32 *%a) { ; Check moves of -1. define void @f4(i32 *%a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: mvhi 0(%r2), -1 ; CHECK: br %r14 store i32 -1, i32 *%a @@ -40,7 +40,7 @@ define void @f4(i32 *%a) { ; Check the low end of the MVHI range. define void @f5(i32 *%a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: mvhi 0(%r2), -32768 ; CHECK: br %r14 store i32 -32768, i32 *%a @@ -49,7 +49,7 @@ define void @f5(i32 *%a) { ; Check the next value down, which can't use MVHI. define void @f6(i32 *%a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: mvhi ; CHECK: br %r14 store i32 -32769, i32 *%a @@ -58,7 +58,7 @@ define void @f6(i32 *%a) { ; Check the high end of the MVHI range. define void @f7(i32 *%a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mvhi 4092(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i32 *%a, i64 1023 @@ -66,34 +66,33 @@ define void @f7(i32 *%a) { ret void } -; Check the next word up, which needs separate address logic. -; Other sequences besides this one would be OK. +; Check the next word up, which is out of range. We prefer STY in that case. define void @f8(i32 *%a) { -; CHECK: f8: -; CHECK: aghi %r2, 4096 -; CHECK: mvhi 0(%r2), 42 +; CHECK-LABEL: f8: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: sty [[TMP]], 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%a, i64 1024 store i32 42, i32 *%ptr ret void } -; Check negative displacements, which also need separate address logic. +; Check negative displacements, for which we again prefer STY. define void @f9(i32 *%a) { -; CHECK: f9: -; CHECK: aghi %r2, -4 -; CHECK: mvhi 0(%r2), 42 +; CHECK-LABEL: f9: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: sty [[TMP]], -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%a, i64 -1 store i32 42, i32 *%ptr ret void } -; Check that MVHI does not allow an index +; Check that MVHI does not allow an index. define void @f10(i64 %src, i64 %index) { -; CHECK: f10: -; CHECK: agr %r2, %r3 -; CHECK: mvhi 0(%r2), 42 +; CHECK-LABEL: f10: +; CHECK: lhi [[TMP:%r[0-5]]], 42 +; CHECK: st [[TMP]], 0({{%r2,%r3|%r3,%r2}}) ; CHECK: br %r14 %add = add i64 %src, %index %ptr = inttoptr i64 %add to i32 * diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll index 9f14347cf880f..cf07c665dde78 100644 --- a/test/CodeGen/SystemZ/int-const-06.ll +++ b/test/CodeGen/SystemZ/int-const-06.ll @@ -4,7 +4,7 @@ ; Check moves of zero. define void @f1(i64 *%a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mvghi 0(%r2), 0 ; CHECK: br %r14 store i64 0, i64 *%a @@ -13,7 +13,7 @@ define void @f1(i64 *%a) { ; Check the high end of the signed 16-bit range. define void @f2(i64 *%a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mvghi 0(%r2), 32767 ; CHECK: br %r14 store i64 32767, i64 *%a @@ -22,7 +22,7 @@ define void @f2(i64 *%a) { ; Check the next value up, which can't use MVGHI. define void @f3(i64 *%a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: mvghi ; CHECK: br %r14 store i64 32768, i64 *%a @@ -31,7 +31,7 @@ define void @f3(i64 *%a) { ; Check moves of -1. define void @f4(i64 *%a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: mvghi 0(%r2), -1 ; CHECK: br %r14 store i64 -1, i64 *%a @@ -40,7 +40,7 @@ define void @f4(i64 *%a) { ; Check the low end of the MVGHI range. define void @f5(i64 *%a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: mvghi 0(%r2), -32768 ; CHECK: br %r14 store i64 -32768, i64 *%a @@ -49,7 +49,7 @@ define void @f5(i64 *%a) { ; Check the next value down, which can't use MVGHI. define void @f6(i64 *%a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: mvghi ; CHECK: br %r14 store i64 -32769, i64 *%a @@ -58,7 +58,7 @@ define void @f6(i64 *%a) { ; Check the high end of the MVGHI range. define void @f7(i64 *%a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mvghi 4088(%r2), 42 ; CHECK: br %r14 %ptr = getelementptr i64 *%a, i64 511 @@ -66,34 +66,34 @@ define void @f7(i64 *%a) { ret void } -; Check the next doubleword up, which needs separate address logic. -; Other sequences besides this one would be OK. +; Check the next doubleword up, which is out of range. We prefer STG +; in that case. define void @f8(i64 *%a) { -; CHECK: f8: -; CHECK: aghi %r2, 4096 -; CHECK: mvghi 0(%r2), 42 +; CHECK-LABEL: f8: +; CHECK: lghi [[TMP:%r[0-5]]], 42 +; CHECK: stg [[TMP]], 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%a, i64 512 store i64 42, i64 *%ptr ret void } -; Check negative displacements, which also need separate address logic. +; Check negative displacements, for which we again prefer STG. define void @f9(i64 *%a) { -; CHECK: f9: -; CHECK: aghi %r2, -8 -; CHECK: mvghi 0(%r2), 42 +; CHECK-LABEL: f9: +; CHECK: lghi [[TMP:%r[0-5]]], 42 +; CHECK: stg [[TMP]], -8(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%a, i64 -1 store i64 42, i64 *%ptr ret void } -; Check that MVGHI does not allow an index +; Check that MVGHI does not allow an index. define void @f10(i64 %src, i64 %index) { -; CHECK: f10: -; CHECK: agr %r2, %r3 -; CHECK: mvghi 0(%r2), 42 +; CHECK-LABEL: f10: +; CHECK: lghi [[TMP:%r[0-5]]], 42 +; CHECK: stg [[TMP]], 0({{%r2,%r3|%r3,%r2}}) ; CHECK: br %r14 %add = add i64 %src, %index %ptr = inttoptr i64 %add to i64 * diff --git a/test/CodeGen/SystemZ/int-conv-01.ll b/test/CodeGen/SystemZ/int-conv-01.ll index 643ac6ae2510f..e5c411cdec1d0 100644 --- a/test/CodeGen/SystemZ/int-conv-01.ll +++ b/test/CodeGen/SystemZ/int-conv-01.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lbr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i32 %a to i8 %ext = sext i8 %byte to i32 ret i32 %ext @@ -14,9 +14,9 @@ define i32 @f1(i32 %a) { ; ...and again with an i64. define i32 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lbr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i64 %a to i8 %ext = sext i8 %byte to i32 ret i32 %ext @@ -24,7 +24,7 @@ define i32 @f2(i64 %a) { ; Check LB with no displacement. define i32 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lb %r2, 0(%r2) ; CHECK: br %r14 %byte = load i8 *%src @@ -34,7 +34,7 @@ define i32 @f3(i8 *%src) { ; Check the high end of the LB range. define i32 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lb %r2, 524287(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -46,7 +46,7 @@ define i32 @f4(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: lb %r2, 0(%r2) ; CHECK: br %r14 @@ -58,7 +58,7 @@ define i32 @f5(i8 *%src) { ; Check the high end of the negative LB range. define i32 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lb %r2, -1(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -69,7 +69,7 @@ define i32 @f6(i8 *%src) { ; Check the low end of the LB range. define i32 @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lb %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -81,7 +81,7 @@ define i32 @f7(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524289 ; CHECK: lb %r2, 0(%r2) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i32 @f8(i8 *%src) { ; Check that LB allows an index define i32 @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lb %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -103,3 +103,97 @@ define i32 @f9(i64 %src, i64 %index) { %ext = sext i8 %byte to i32 ret i32 %ext } + +; Test a case where we spill the source of at least one LBR. We want +; to use LB if possible. +define void @f10(i32 *%ptr) { +; CHECK-LABEL: f10: +; CHECK: lb {{%r[0-9]+}}, 16{{[37]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i8 + %trunc1 = trunc i32 %val1 to i8 + %trunc2 = trunc i32 %val2 to i8 + %trunc3 = trunc i32 %val3 to i8 + %trunc4 = trunc i32 %val4 to i8 + %trunc5 = trunc i32 %val5 to i8 + %trunc6 = trunc i32 %val6 to i8 + %trunc7 = trunc i32 %val7 to i8 + %trunc8 = trunc i32 %val8 to i8 + %trunc9 = trunc i32 %val9 to i8 + %trunc10 = trunc i32 %val10 to i8 + %trunc11 = trunc i32 %val11 to i8 + %trunc12 = trunc i32 %val12 to i8 + %trunc13 = trunc i32 %val13 to i8 + %trunc14 = trunc i32 %val14 to i8 + %trunc15 = trunc i32 %val15 to i8 + + %ext0 = sext i8 %trunc0 to i32 + %ext1 = sext i8 %trunc1 to i32 + %ext2 = sext i8 %trunc2 to i32 + %ext3 = sext i8 %trunc3 to i32 + %ext4 = sext i8 %trunc4 to i32 + %ext5 = sext i8 %trunc5 to i32 + %ext6 = sext i8 %trunc6 to i32 + %ext7 = sext i8 %trunc7 to i32 + %ext8 = sext i8 %trunc8 to i32 + %ext9 = sext i8 %trunc9 to i32 + %ext10 = sext i8 %trunc10 to i32 + %ext11 = sext i8 %trunc11 to i32 + %ext12 = sext i8 %trunc12 to i32 + %ext13 = sext i8 %trunc13 to i32 + %ext14 = sext i8 %trunc14 to i32 + %ext15 = sext i8 %trunc15 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll index 86144d3e64505..dd7760d08cf53 100644 --- a/test/CodeGen/SystemZ/int-conv-02.ll +++ b/test/CodeGen/SystemZ/int-conv-02.ll @@ -1,12 +1,13 @@ -; Test zero extensions from a byte to an i32. +; Test zero extensions from a byte to an i32. The tests here +; assume z10 register pressure, without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Test register extension, starting with an i32. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i32 %a to i8 %ext = zext i8 %byte to i32 ret i32 %ext @@ -14,9 +15,9 @@ define i32 @f1(i32 %a) { ; ...and again with an i64. define i32 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i64 %a to i8 %ext = zext i8 %byte to i32 ret i32 %ext @@ -24,16 +25,16 @@ define i32 @f2(i64 %a) { ; Check ANDs that are equivalent to zero extension. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = and i32 %a, 255 ret i32 %ext } ; Check LLC with no displacement. define i32 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llc %r2, 0(%r2) ; CHECK: br %r14 %byte = load i8 *%src @@ -43,7 +44,7 @@ define i32 @f4(i8 *%src) { ; Check the high end of the LLC range. define i32 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llc %r2, 524287(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -55,7 +56,7 @@ define i32 @f5(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: llc %r2, 0(%r2) ; CHECK: br %r14 @@ -67,7 +68,7 @@ define i32 @f6(i8 *%src) { ; Check the high end of the negative LLC range. define i32 @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llc %r2, -1(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -78,7 +79,7 @@ define i32 @f7(i8 *%src) { ; Check the low end of the LLC range. define i32 @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llc %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -90,7 +91,7 @@ define i32 @f8(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524289 ; CHECK: llc %r2, 0(%r2) ; CHECK: br %r14 @@ -102,7 +103,7 @@ define i32 @f9(i8 *%src) { ; Check that LLC allows an index define i32 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llc %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -112,3 +113,97 @@ define i32 @f10(i64 %src, i64 %index) { %ext = zext i8 %byte to i32 ret i32 %ext } + +; Test a case where we spill the source of at least one LLCR. We want +; to use LLC if possible. +define void @f11(i32 *%ptr) { +; CHECK-LABEL: f11: +; CHECK: llc {{%r[0-9]+}}, 16{{[37]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i8 + %trunc1 = trunc i32 %val1 to i8 + %trunc2 = trunc i32 %val2 to i8 + %trunc3 = trunc i32 %val3 to i8 + %trunc4 = trunc i32 %val4 to i8 + %trunc5 = trunc i32 %val5 to i8 + %trunc6 = trunc i32 %val6 to i8 + %trunc7 = trunc i32 %val7 to i8 + %trunc8 = trunc i32 %val8 to i8 + %trunc9 = trunc i32 %val9 to i8 + %trunc10 = trunc i32 %val10 to i8 + %trunc11 = trunc i32 %val11 to i8 + %trunc12 = trunc i32 %val12 to i8 + %trunc13 = trunc i32 %val13 to i8 + %trunc14 = trunc i32 %val14 to i8 + %trunc15 = trunc i32 %val15 to i8 + + %ext0 = zext i8 %trunc0 to i32 + %ext1 = zext i8 %trunc1 to i32 + %ext2 = zext i8 %trunc2 to i32 + %ext3 = zext i8 %trunc3 to i32 + %ext4 = zext i8 %trunc4 to i32 + %ext5 = zext i8 %trunc5 to i32 + %ext6 = zext i8 %trunc6 to i32 + %ext7 = zext i8 %trunc7 to i32 + %ext8 = zext i8 %trunc8 to i32 + %ext9 = zext i8 %trunc9 to i32 + %ext10 = zext i8 %trunc10 to i32 + %ext11 = zext i8 %trunc11 to i32 + %ext12 = zext i8 %trunc12 to i32 + %ext13 = zext i8 %trunc13 to i32 + %ext14 = zext i8 %trunc14 to i32 + %ext15 = zext i8 %trunc15 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-03.ll b/test/CodeGen/SystemZ/int-conv-03.ll index 73b8dbb43a139..cad9581296a4c 100644 --- a/test/CodeGen/SystemZ/int-conv-03.ll +++ b/test/CodeGen/SystemZ/int-conv-03.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i64 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lgbr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i32 %a to i8 %ext = sext i8 %byte to i64 ret i64 %ext @@ -14,9 +14,9 @@ define i64 @f1(i32 %a) { ; ...and again with an i64. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lgbr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i64 %a to i8 %ext = sext i8 %byte to i64 ret i64 %ext @@ -24,7 +24,7 @@ define i64 @f2(i64 %a) { ; Check LGB with no displacement. define i64 @f3(i8 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgb %r2, 0(%r2) ; CHECK: br %r14 %byte = load i8 *%src @@ -34,7 +34,7 @@ define i64 @f3(i8 *%src) { ; Check the high end of the LGB range. define i64 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgb %r2, 524287(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -46,7 +46,7 @@ define i64 @f4(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: lgb %r2, 0(%r2) ; CHECK: br %r14 @@ -58,7 +58,7 @@ define i64 @f5(i8 *%src) { ; Check the high end of the negative LGB range. define i64 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgb %r2, -1(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -69,7 +69,7 @@ define i64 @f6(i8 *%src) { ; Check the low end of the LGB range. define i64 @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgb %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -81,7 +81,7 @@ define i64 @f7(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524289 ; CHECK: lgb %r2, 0(%r2) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i64 @f8(i8 *%src) { ; Check that LGB allows an index define i64 @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lgb %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -103,3 +103,97 @@ define i64 @f9(i64 %src, i64 %index) { %ext = sext i8 %byte to i64 ret i64 %ext } + +; Test a case where we spill the source of at least one LGBR. We want +; to use LGB if possible. +define void @f10(i64 *%ptr) { +; CHECK-LABEL: f10: +; CHECK: lgb {{%r[0-9]+}}, 167(%r15) +; CHECK: br %r14 + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %trunc0 = trunc i64 %val0 to i8 + %trunc1 = trunc i64 %val1 to i8 + %trunc2 = trunc i64 %val2 to i8 + %trunc3 = trunc i64 %val3 to i8 + %trunc4 = trunc i64 %val4 to i8 + %trunc5 = trunc i64 %val5 to i8 + %trunc6 = trunc i64 %val6 to i8 + %trunc7 = trunc i64 %val7 to i8 + %trunc8 = trunc i64 %val8 to i8 + %trunc9 = trunc i64 %val9 to i8 + %trunc10 = trunc i64 %val10 to i8 + %trunc11 = trunc i64 %val11 to i8 + %trunc12 = trunc i64 %val12 to i8 + %trunc13 = trunc i64 %val13 to i8 + %trunc14 = trunc i64 %val14 to i8 + %trunc15 = trunc i64 %val15 to i8 + + %ext0 = sext i8 %trunc0 to i64 + %ext1 = sext i8 %trunc1 to i64 + %ext2 = sext i8 %trunc2 to i64 + %ext3 = sext i8 %trunc3 to i64 + %ext4 = sext i8 %trunc4 to i64 + %ext5 = sext i8 %trunc5 to i64 + %ext6 = sext i8 %trunc6 to i64 + %ext7 = sext i8 %trunc7 to i64 + %ext8 = sext i8 %trunc8 to i64 + %ext9 = sext i8 %trunc9 to i64 + %ext10 = sext i8 %trunc10 to i64 + %ext11 = sext i8 %trunc11 to i64 + %ext12 = sext i8 %trunc12 to i64 + %ext13 = sext i8 %trunc13 to i64 + %ext14 = sext i8 %trunc14 to i64 + %ext15 = sext i8 %trunc15 to i64 + + store volatile i64 %val0, i64 *%ptr + store volatile i64 %val1, i64 *%ptr + store volatile i64 %val2, i64 *%ptr + store volatile i64 %val3, i64 *%ptr + store volatile i64 %val4, i64 *%ptr + store volatile i64 %val5, i64 *%ptr + store volatile i64 %val6, i64 *%ptr + store volatile i64 %val7, i64 *%ptr + store volatile i64 %val8, i64 *%ptr + store volatile i64 %val9, i64 *%ptr + store volatile i64 %val10, i64 *%ptr + store volatile i64 %val11, i64 *%ptr + store volatile i64 %val12, i64 *%ptr + store volatile i64 %val13, i64 *%ptr + store volatile i64 %val14, i64 *%ptr + store volatile i64 %val15, i64 *%ptr + + store volatile i64 %ext0, i64 *%ptr + store volatile i64 %ext1, i64 *%ptr + store volatile i64 %ext2, i64 *%ptr + store volatile i64 %ext3, i64 *%ptr + store volatile i64 %ext4, i64 *%ptr + store volatile i64 %ext5, i64 *%ptr + store volatile i64 %ext6, i64 *%ptr + store volatile i64 %ext7, i64 *%ptr + store volatile i64 %ext8, i64 *%ptr + store volatile i64 %ext9, i64 *%ptr + store volatile i64 %ext10, i64 *%ptr + store volatile i64 %ext11, i64 *%ptr + store volatile i64 %ext12, i64 *%ptr + store volatile i64 %ext13, i64 *%ptr + store volatile i64 %ext14, i64 *%ptr + store volatile i64 %ext15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-04.ll b/test/CodeGen/SystemZ/int-conv-04.ll index 4cec5242e880c..1c6be7b6e8a43 100644 --- a/test/CodeGen/SystemZ/int-conv-04.ll +++ b/test/CodeGen/SystemZ/int-conv-04.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i64 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llgcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i32 %a to i8 %ext = zext i8 %byte to i64 ret i64 %ext @@ -14,9 +14,9 @@ define i64 @f1(i32 %a) { ; ...and again with an i64. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llgcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %byte = trunc i64 %a to i8 %ext = zext i8 %byte to i64 ret i64 %ext @@ -24,16 +24,16 @@ define i64 @f2(i64 %a) { ; Check ANDs that are equivalent to zero extension. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llgcr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = and i64 %a, 255 ret i64 %ext } ; Check LLGC with no displacement. define i64 @f4(i8 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llgc %r2, 0(%r2) ; CHECK: br %r14 %byte = load i8 *%src @@ -43,7 +43,7 @@ define i64 @f4(i8 *%src) { ; Check the high end of the LLGC range. define i64 @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llgc %r2, 524287(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -55,7 +55,7 @@ define i64 @f5(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: llgc %r2, 0(%r2) ; CHECK: br %r14 @@ -67,7 +67,7 @@ define i64 @f6(i8 *%src) { ; Check the high end of the negative LLGC range. define i64 @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llgc %r2, -1(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -78,7 +78,7 @@ define i64 @f7(i8 *%src) { ; Check the low end of the LLGC range. define i64 @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llgc %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -90,7 +90,7 @@ define i64 @f8(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524289 ; CHECK: llgc %r2, 0(%r2) ; CHECK: br %r14 @@ -102,7 +102,7 @@ define i64 @f9(i8 *%src) { ; Check that LLGC allows an index define i64 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llgc %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -112,3 +112,97 @@ define i64 @f10(i64 %src, i64 %index) { %ext = zext i8 %byte to i64 ret i64 %ext } + +; Test a case where we spill the source of at least one LLGCR. We want +; to use LLGC if possible. +define void @f11(i64 *%ptr) { +; CHECK-LABEL: f11: +; CHECK: llgc {{%r[0-9]+}}, 167(%r15) +; CHECK: br %r14 + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %trunc0 = trunc i64 %val0 to i8 + %trunc1 = trunc i64 %val1 to i8 + %trunc2 = trunc i64 %val2 to i8 + %trunc3 = trunc i64 %val3 to i8 + %trunc4 = trunc i64 %val4 to i8 + %trunc5 = trunc i64 %val5 to i8 + %trunc6 = trunc i64 %val6 to i8 + %trunc7 = trunc i64 %val7 to i8 + %trunc8 = trunc i64 %val8 to i8 + %trunc9 = trunc i64 %val9 to i8 + %trunc10 = trunc i64 %val10 to i8 + %trunc11 = trunc i64 %val11 to i8 + %trunc12 = trunc i64 %val12 to i8 + %trunc13 = trunc i64 %val13 to i8 + %trunc14 = trunc i64 %val14 to i8 + %trunc15 = trunc i64 %val15 to i8 + + %ext0 = zext i8 %trunc0 to i64 + %ext1 = zext i8 %trunc1 to i64 + %ext2 = zext i8 %trunc2 to i64 + %ext3 = zext i8 %trunc3 to i64 + %ext4 = zext i8 %trunc4 to i64 + %ext5 = zext i8 %trunc5 to i64 + %ext6 = zext i8 %trunc6 to i64 + %ext7 = zext i8 %trunc7 to i64 + %ext8 = zext i8 %trunc8 to i64 + %ext9 = zext i8 %trunc9 to i64 + %ext10 = zext i8 %trunc10 to i64 + %ext11 = zext i8 %trunc11 to i64 + %ext12 = zext i8 %trunc12 to i64 + %ext13 = zext i8 %trunc13 to i64 + %ext14 = zext i8 %trunc14 to i64 + %ext15 = zext i8 %trunc15 to i64 + + store volatile i64 %val0, i64 *%ptr + store volatile i64 %val1, i64 *%ptr + store volatile i64 %val2, i64 *%ptr + store volatile i64 %val3, i64 *%ptr + store volatile i64 %val4, i64 *%ptr + store volatile i64 %val5, i64 *%ptr + store volatile i64 %val6, i64 *%ptr + store volatile i64 %val7, i64 *%ptr + store volatile i64 %val8, i64 *%ptr + store volatile i64 %val9, i64 *%ptr + store volatile i64 %val10, i64 *%ptr + store volatile i64 %val11, i64 *%ptr + store volatile i64 %val12, i64 *%ptr + store volatile i64 %val13, i64 *%ptr + store volatile i64 %val14, i64 *%ptr + store volatile i64 %val15, i64 *%ptr + + store volatile i64 %ext0, i64 *%ptr + store volatile i64 %ext1, i64 *%ptr + store volatile i64 %ext2, i64 *%ptr + store volatile i64 %ext3, i64 *%ptr + store volatile i64 %ext4, i64 *%ptr + store volatile i64 %ext5, i64 *%ptr + store volatile i64 %ext6, i64 *%ptr + store volatile i64 %ext7, i64 *%ptr + store volatile i64 %ext8, i64 *%ptr + store volatile i64 %ext9, i64 *%ptr + store volatile i64 %ext10, i64 *%ptr + store volatile i64 %ext11, i64 *%ptr + store volatile i64 %ext12, i64 *%ptr + store volatile i64 %ext13, i64 *%ptr + store volatile i64 %ext14, i64 *%ptr + store volatile i64 %ext15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-05.ll b/test/CodeGen/SystemZ/int-conv-05.ll index 5358f7d9228a8..5eade93ac584c 100644 --- a/test/CodeGen/SystemZ/int-conv-05.ll +++ b/test/CodeGen/SystemZ/int-conv-05.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i32 %a to i16 %ext = sext i16 %half to i32 ret i32 %ext @@ -14,9 +14,9 @@ define i32 @f1(i32 %a) { ; ...and again with an i64. define i32 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lhr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i64 %a to i16 %ext = sext i16 %half to i32 ret i32 %ext @@ -24,7 +24,7 @@ define i32 @f2(i64 %a) { ; Check the low end of the LH range. define i32 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lh %r2, 0(%r2) ; CHECK: br %r14 %half = load i16 *%src @@ -34,7 +34,7 @@ define i32 @f3(i16 *%src) { ; Check the high end of the LH range. define i32 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lh %r2, 4094(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2047 @@ -45,7 +45,7 @@ define i32 @f4(i16 *%src) { ; Check the next halfword up, which needs LHY rather than LH. define i32 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lhy %r2, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2048 @@ -56,7 +56,7 @@ define i32 @f5(i16 *%src) { ; Check the high end of the LHY range. define i32 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lhy %r2, 524286(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -68,7 +68,7 @@ define i32 @f6(i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f7(i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r2, 524288 ; CHECK: lh %r2, 0(%r2) ; CHECK: br %r14 @@ -80,7 +80,7 @@ define i32 @f7(i16 *%src) { ; Check the high end of the negative LHY range. define i32 @f8(i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lhy %r2, -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -91,7 +91,7 @@ define i32 @f8(i16 *%src) { ; Check the low end of the LHY range. define i32 @f9(i16 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lhy %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -103,7 +103,7 @@ define i32 @f9(i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f10(i16 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r2, -524290 ; CHECK: lh %r2, 0(%r2) ; CHECK: br %r14 @@ -115,7 +115,7 @@ define i32 @f10(i16 *%src) { ; Check that LH allows an index define i32 @f11(i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: lh %r2, 4094(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -128,7 +128,7 @@ define i32 @f11(i64 %src, i64 %index) { ; Check that LH allows an index define i32 @f12(i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: lhy %r2, 4096(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -138,3 +138,97 @@ define i32 @f12(i64 %src, i64 %index) { %ext = sext i16 %half to i32 ret i32 %ext } + +; Test a case where we spill the source of at least one LHR. We want +; to use LH if possible. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: lh {{%r[0-9]+}}, 16{{[26]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i16 + %trunc1 = trunc i32 %val1 to i16 + %trunc2 = trunc i32 %val2 to i16 + %trunc3 = trunc i32 %val3 to i16 + %trunc4 = trunc i32 %val4 to i16 + %trunc5 = trunc i32 %val5 to i16 + %trunc6 = trunc i32 %val6 to i16 + %trunc7 = trunc i32 %val7 to i16 + %trunc8 = trunc i32 %val8 to i16 + %trunc9 = trunc i32 %val9 to i16 + %trunc10 = trunc i32 %val10 to i16 + %trunc11 = trunc i32 %val11 to i16 + %trunc12 = trunc i32 %val12 to i16 + %trunc13 = trunc i32 %val13 to i16 + %trunc14 = trunc i32 %val14 to i16 + %trunc15 = trunc i32 %val15 to i16 + + %ext0 = sext i16 %trunc0 to i32 + %ext1 = sext i16 %trunc1 to i32 + %ext2 = sext i16 %trunc2 to i32 + %ext3 = sext i16 %trunc3 to i32 + %ext4 = sext i16 %trunc4 to i32 + %ext5 = sext i16 %trunc5 to i32 + %ext6 = sext i16 %trunc6 to i32 + %ext7 = sext i16 %trunc7 to i32 + %ext8 = sext i16 %trunc8 to i32 + %ext9 = sext i16 %trunc9 to i32 + %ext10 = sext i16 %trunc10 to i32 + %ext11 = sext i16 %trunc11 to i32 + %ext12 = sext i16 %trunc12 to i32 + %ext13 = sext i16 %trunc13 to i32 + %ext14 = sext i16 %trunc14 to i32 + %ext15 = sext i16 %trunc15 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll index 64af612d65f52..33860d12270fd 100644 --- a/test/CodeGen/SystemZ/int-conv-06.ll +++ b/test/CodeGen/SystemZ/int-conv-06.ll @@ -1,12 +1,13 @@ -; Test zero extensions from a halfword to an i32. +; Test zero extensions from a halfword to an i32. The tests here +; assume z10 register pressure, without the high words being available. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; Test register extension, starting with an i32. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llhr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i32 %a to i16 %ext = zext i16 %half to i32 ret i32 %ext @@ -14,9 +15,9 @@ define i32 @f1(i32 %a) { ; ...and again with an i64. define i32 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llhr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i64 %a to i16 %ext = zext i16 %half to i32 ret i32 %ext @@ -24,16 +25,16 @@ define i32 @f2(i64 %a) { ; Check ANDs that are equivalent to zero extension. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llhr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = and i32 %a, 65535 ret i32 %ext } ; Check LLH with no displacement. define i32 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llh %r2, 0(%r2) ; CHECK: br %r14 %half = load i16 *%src @@ -43,7 +44,7 @@ define i32 @f4(i16 *%src) { ; Check the high end of the LLH range. define i32 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llh %r2, 524286(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -55,7 +56,7 @@ define i32 @f5(i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: llh %r2, 0(%r2) ; CHECK: br %r14 @@ -67,7 +68,7 @@ define i32 @f6(i16 *%src) { ; Check the high end of the negative LLH range. define i32 @f7(i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llh %r2, -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -78,7 +79,7 @@ define i32 @f7(i16 *%src) { ; Check the low end of the LLH range. define i32 @f8(i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llh %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -90,7 +91,7 @@ define i32 @f8(i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i16 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524290 ; CHECK: llh %r2, 0(%r2) ; CHECK: br %r14 @@ -102,7 +103,7 @@ define i32 @f9(i16 *%src) { ; Check that LLH allows an index define i32 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llh %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -112,3 +113,97 @@ define i32 @f10(i64 %src, i64 %index) { %ext = zext i16 %half to i32 ret i32 %ext } + +; Test a case where we spill the source of at least one LLHR. We want +; to use LLH if possible. +define void @f11(i32 *%ptr) { +; CHECK-LABEL: f11: +; CHECK: llh {{%r[0-9]+}}, 16{{[26]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i16 + %trunc1 = trunc i32 %val1 to i16 + %trunc2 = trunc i32 %val2 to i16 + %trunc3 = trunc i32 %val3 to i16 + %trunc4 = trunc i32 %val4 to i16 + %trunc5 = trunc i32 %val5 to i16 + %trunc6 = trunc i32 %val6 to i16 + %trunc7 = trunc i32 %val7 to i16 + %trunc8 = trunc i32 %val8 to i16 + %trunc9 = trunc i32 %val9 to i16 + %trunc10 = trunc i32 %val10 to i16 + %trunc11 = trunc i32 %val11 to i16 + %trunc12 = trunc i32 %val12 to i16 + %trunc13 = trunc i32 %val13 to i16 + %trunc14 = trunc i32 %val14 to i16 + %trunc15 = trunc i32 %val15 to i16 + + %ext0 = zext i16 %trunc0 to i32 + %ext1 = zext i16 %trunc1 to i32 + %ext2 = zext i16 %trunc2 to i32 + %ext3 = zext i16 %trunc3 to i32 + %ext4 = zext i16 %trunc4 to i32 + %ext5 = zext i16 %trunc5 to i32 + %ext6 = zext i16 %trunc6 to i32 + %ext7 = zext i16 %trunc7 to i32 + %ext8 = zext i16 %trunc8 to i32 + %ext9 = zext i16 %trunc9 to i32 + %ext10 = zext i16 %trunc10 to i32 + %ext11 = zext i16 %trunc11 to i32 + %ext12 = zext i16 %trunc12 to i32 + %ext13 = zext i16 %trunc13 to i32 + %ext14 = zext i16 %trunc14 to i32 + %ext15 = zext i16 %trunc15 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-07.ll b/test/CodeGen/SystemZ/int-conv-07.ll index 041caa244c8e3..4b78c773d1ea8 100644 --- a/test/CodeGen/SystemZ/int-conv-07.ll +++ b/test/CodeGen/SystemZ/int-conv-07.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lghr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i64 %a to i16 %ext = sext i16 %half to i64 ret i64 %ext @@ -14,9 +14,9 @@ define i64 @f1(i64 %a) { ; ...and again with an i64. define i64 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lghr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i32 %a to i16 %ext = sext i16 %half to i64 ret i64 %ext @@ -24,7 +24,7 @@ define i64 @f2(i32 %a) { ; Check LGH with no displacement. define i64 @f3(i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgh %r2, 0(%r2) ; CHECK: br %r14 %half = load i16 *%src @@ -34,7 +34,7 @@ define i64 @f3(i16 *%src) { ; Check the high end of the LGH range. define i64 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgh %r2, 524286(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -46,7 +46,7 @@ define i64 @f4(i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: lgh %r2, 0(%r2) ; CHECK: br %r14 @@ -58,7 +58,7 @@ define i64 @f5(i16 *%src) { ; Check the high end of the negative LGH range. define i64 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgh %r2, -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -69,7 +69,7 @@ define i64 @f6(i16 *%src) { ; Check the low end of the LGH range. define i64 @f7(i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgh %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -81,7 +81,7 @@ define i64 @f7(i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524290 ; CHECK: lgh %r2, 0(%r2) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i64 @f8(i16 *%src) { ; Check that LGH allows an index. define i64 @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lgh %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -103,3 +103,97 @@ define i64 @f9(i64 %src, i64 %index) { %ext = sext i16 %half to i64 ret i64 %ext } + +; Test a case where we spill the source of at least one LGHR. We want +; to use LGH if possible. +define void @f10(i64 *%ptr) { +; CHECK-LABEL: f10: +; CHECK: lgh {{%r[0-9]+}}, 166(%r15) +; CHECK: br %r14 + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %trunc0 = trunc i64 %val0 to i16 + %trunc1 = trunc i64 %val1 to i16 + %trunc2 = trunc i64 %val2 to i16 + %trunc3 = trunc i64 %val3 to i16 + %trunc4 = trunc i64 %val4 to i16 + %trunc5 = trunc i64 %val5 to i16 + %trunc6 = trunc i64 %val6 to i16 + %trunc7 = trunc i64 %val7 to i16 + %trunc8 = trunc i64 %val8 to i16 + %trunc9 = trunc i64 %val9 to i16 + %trunc10 = trunc i64 %val10 to i16 + %trunc11 = trunc i64 %val11 to i16 + %trunc12 = trunc i64 %val12 to i16 + %trunc13 = trunc i64 %val13 to i16 + %trunc14 = trunc i64 %val14 to i16 + %trunc15 = trunc i64 %val15 to i16 + + %ext0 = sext i16 %trunc0 to i64 + %ext1 = sext i16 %trunc1 to i64 + %ext2 = sext i16 %trunc2 to i64 + %ext3 = sext i16 %trunc3 to i64 + %ext4 = sext i16 %trunc4 to i64 + %ext5 = sext i16 %trunc5 to i64 + %ext6 = sext i16 %trunc6 to i64 + %ext7 = sext i16 %trunc7 to i64 + %ext8 = sext i16 %trunc8 to i64 + %ext9 = sext i16 %trunc9 to i64 + %ext10 = sext i16 %trunc10 to i64 + %ext11 = sext i16 %trunc11 to i64 + %ext12 = sext i16 %trunc12 to i64 + %ext13 = sext i16 %trunc13 to i64 + %ext14 = sext i16 %trunc14 to i64 + %ext15 = sext i16 %trunc15 to i64 + + store volatile i64 %val0, i64 *%ptr + store volatile i64 %val1, i64 *%ptr + store volatile i64 %val2, i64 *%ptr + store volatile i64 %val3, i64 *%ptr + store volatile i64 %val4, i64 *%ptr + store volatile i64 %val5, i64 *%ptr + store volatile i64 %val6, i64 *%ptr + store volatile i64 %val7, i64 *%ptr + store volatile i64 %val8, i64 *%ptr + store volatile i64 %val9, i64 *%ptr + store volatile i64 %val10, i64 *%ptr + store volatile i64 %val11, i64 *%ptr + store volatile i64 %val12, i64 *%ptr + store volatile i64 %val13, i64 *%ptr + store volatile i64 %val14, i64 *%ptr + store volatile i64 %val15, i64 *%ptr + + store volatile i64 %ext0, i64 *%ptr + store volatile i64 %ext1, i64 *%ptr + store volatile i64 %ext2, i64 *%ptr + store volatile i64 %ext3, i64 *%ptr + store volatile i64 %ext4, i64 *%ptr + store volatile i64 %ext5, i64 *%ptr + store volatile i64 %ext6, i64 *%ptr + store volatile i64 %ext7, i64 *%ptr + store volatile i64 %ext8, i64 *%ptr + store volatile i64 %ext9, i64 *%ptr + store volatile i64 %ext10, i64 *%ptr + store volatile i64 %ext11, i64 *%ptr + store volatile i64 %ext12, i64 *%ptr + store volatile i64 %ext13, i64 *%ptr + store volatile i64 %ext14, i64 *%ptr + store volatile i64 %ext15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-08.ll b/test/CodeGen/SystemZ/int-conv-08.ll index 3d7f96675da99..6b6cb672fb9a1 100644 --- a/test/CodeGen/SystemZ/int-conv-08.ll +++ b/test/CodeGen/SystemZ/int-conv-08.ll @@ -4,9 +4,9 @@ ; Test register extension, starting with an i32. define i64 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llghr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i32 %a to i16 %ext = zext i16 %half to i64 ret i64 %ext @@ -14,9 +14,9 @@ define i64 @f1(i32 %a) { ; ...and again with an i64. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llghr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %half = trunc i64 %a to i16 %ext = zext i16 %half to i64 ret i64 %ext @@ -24,16 +24,16 @@ define i64 @f2(i64 %a) { ; Check ANDs that are equivalent to zero extension. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llghr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = and i64 %a, 65535 ret i64 %ext } ; Check LLGH with no displacement. define i64 @f4(i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llgh %r2, 0(%r2) ; CHECK: br %r14 %half = load i16 *%src @@ -43,7 +43,7 @@ define i64 @f4(i16 *%src) { ; Check the high end of the LLGH range. define i64 @f5(i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llgh %r2, 524286(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -55,7 +55,7 @@ define i64 @f5(i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: llgh %r2, 0(%r2) ; CHECK: br %r14 @@ -67,7 +67,7 @@ define i64 @f6(i16 *%src) { ; Check the high end of the negative LLGH range. define i64 @f7(i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llgh %r2, -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -78,7 +78,7 @@ define i64 @f7(i16 *%src) { ; Check the low end of the LLGH range. define i64 @f8(i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llgh %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -90,7 +90,7 @@ define i64 @f8(i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f9(i16 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524290 ; CHECK: llgh %r2, 0(%r2) ; CHECK: br %r14 @@ -102,7 +102,7 @@ define i64 @f9(i16 *%src) { ; Check that LLGH allows an index define i64 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llgh %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -112,3 +112,97 @@ define i64 @f10(i64 %src, i64 %index) { %ext = zext i16 %half to i64 ret i64 %ext } + +; Test a case where we spill the source of at least one LLGHR. We want +; to use LLGH if possible. +define void @f11(i64 *%ptr) { +; CHECK-LABEL: f11: +; CHECK: llgh {{%r[0-9]+}}, 166(%r15) +; CHECK: br %r14 + %val0 = load volatile i64 *%ptr + %val1 = load volatile i64 *%ptr + %val2 = load volatile i64 *%ptr + %val3 = load volatile i64 *%ptr + %val4 = load volatile i64 *%ptr + %val5 = load volatile i64 *%ptr + %val6 = load volatile i64 *%ptr + %val7 = load volatile i64 *%ptr + %val8 = load volatile i64 *%ptr + %val9 = load volatile i64 *%ptr + %val10 = load volatile i64 *%ptr + %val11 = load volatile i64 *%ptr + %val12 = load volatile i64 *%ptr + %val13 = load volatile i64 *%ptr + %val14 = load volatile i64 *%ptr + %val15 = load volatile i64 *%ptr + + %trunc0 = trunc i64 %val0 to i16 + %trunc1 = trunc i64 %val1 to i16 + %trunc2 = trunc i64 %val2 to i16 + %trunc3 = trunc i64 %val3 to i16 + %trunc4 = trunc i64 %val4 to i16 + %trunc5 = trunc i64 %val5 to i16 + %trunc6 = trunc i64 %val6 to i16 + %trunc7 = trunc i64 %val7 to i16 + %trunc8 = trunc i64 %val8 to i16 + %trunc9 = trunc i64 %val9 to i16 + %trunc10 = trunc i64 %val10 to i16 + %trunc11 = trunc i64 %val11 to i16 + %trunc12 = trunc i64 %val12 to i16 + %trunc13 = trunc i64 %val13 to i16 + %trunc14 = trunc i64 %val14 to i16 + %trunc15 = trunc i64 %val15 to i16 + + %ext0 = zext i16 %trunc0 to i64 + %ext1 = zext i16 %trunc1 to i64 + %ext2 = zext i16 %trunc2 to i64 + %ext3 = zext i16 %trunc3 to i64 + %ext4 = zext i16 %trunc4 to i64 + %ext5 = zext i16 %trunc5 to i64 + %ext6 = zext i16 %trunc6 to i64 + %ext7 = zext i16 %trunc7 to i64 + %ext8 = zext i16 %trunc8 to i64 + %ext9 = zext i16 %trunc9 to i64 + %ext10 = zext i16 %trunc10 to i64 + %ext11 = zext i16 %trunc11 to i64 + %ext12 = zext i16 %trunc12 to i64 + %ext13 = zext i16 %trunc13 to i64 + %ext14 = zext i16 %trunc14 to i64 + %ext15 = zext i16 %trunc15 to i64 + + store volatile i64 %val0, i64 *%ptr + store volatile i64 %val1, i64 *%ptr + store volatile i64 %val2, i64 *%ptr + store volatile i64 %val3, i64 *%ptr + store volatile i64 %val4, i64 *%ptr + store volatile i64 %val5, i64 *%ptr + store volatile i64 %val6, i64 *%ptr + store volatile i64 %val7, i64 *%ptr + store volatile i64 %val8, i64 *%ptr + store volatile i64 %val9, i64 *%ptr + store volatile i64 %val10, i64 *%ptr + store volatile i64 %val11, i64 *%ptr + store volatile i64 %val12, i64 *%ptr + store volatile i64 %val13, i64 *%ptr + store volatile i64 %val14, i64 *%ptr + store volatile i64 %val15, i64 *%ptr + + store volatile i64 %ext0, i64 *%ptr + store volatile i64 %ext1, i64 *%ptr + store volatile i64 %ext2, i64 *%ptr + store volatile i64 %ext3, i64 *%ptr + store volatile i64 %ext4, i64 *%ptr + store volatile i64 %ext5, i64 *%ptr + store volatile i64 %ext6, i64 *%ptr + store volatile i64 %ext7, i64 *%ptr + store volatile i64 %ext8, i64 *%ptr + store volatile i64 %ext9, i64 *%ptr + store volatile i64 %ext10, i64 *%ptr + store volatile i64 %ext11, i64 *%ptr + store volatile i64 %ext12, i64 *%ptr + store volatile i64 %ext13, i64 *%ptr + store volatile i64 %ext14, i64 *%ptr + store volatile i64 %ext15, i64 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll index 6e93886895d55..b9c508917d4db 100644 --- a/test/CodeGen/SystemZ/int-conv-09.ll +++ b/test/CodeGen/SystemZ/int-conv-09.ll @@ -4,18 +4,18 @@ ; Test register extension, starting with an i32. define i64 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lgfr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = sext i32 %a to i64 ret i64 %ext } ; ...and again with an i64. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lgfr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %word = trunc i64 %a to i32 %ext = sext i32 %word to i64 ret i64 %ext @@ -23,7 +23,7 @@ define i64 @f2(i64 %a) { ; Check LGF with no displacement. define i64 @f3(i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgf %r2, 0(%r2) ; CHECK: br %r14 %word = load i32 *%src @@ -33,7 +33,7 @@ define i64 @f3(i32 *%src) { ; Check the high end of the LGF range. define i64 @f4(i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgf %r2, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -45,7 +45,7 @@ define i64 @f4(i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f5(i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: lgf %r2, 0(%r2) ; CHECK: br %r14 @@ -57,7 +57,7 @@ define i64 @f5(i32 *%src) { ; Check the high end of the negative LGF range. define i64 @f6(i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgf %r2, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -68,7 +68,7 @@ define i64 @f6(i32 *%src) { ; Check the low end of the LGF range. define i64 @f7(i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgf %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -80,7 +80,7 @@ define i64 @f7(i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524292 ; CHECK: lgf %r2, 0(%r2) ; CHECK: br %r14 @@ -92,7 +92,7 @@ define i64 @f8(i32 *%src) { ; Check that LGF allows an index. define i64 @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lgf %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll index 918bc1de8fa5a..781c74c7fa234 100644 --- a/test/CodeGen/SystemZ/int-conv-10.ll +++ b/test/CodeGen/SystemZ/int-conv-10.ll @@ -4,18 +4,18 @@ ; Test register extension, starting with an i32. define i64 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: llgfr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = zext i32 %a to i64 ret i64 %ext } ; ...and again with an i64. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llgfr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %word = trunc i64 %a to i32 %ext = zext i32 %word to i64 ret i64 %ext @@ -23,16 +23,16 @@ define i64 @f2(i64 %a) { ; Check ANDs that are equivalent to zero extension. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: llgfr %r2, %r2 -; CHECk: br %r14 +; CHECK: br %r14 %ext = and i64 %a, 4294967295 ret i64 %ext } ; Check LLGF with no displacement. define i64 @f4(i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llgf %r2, 0(%r2) ; CHECK: br %r14 %word = load i32 *%src @@ -42,7 +42,7 @@ define i64 @f4(i32 *%src) { ; Check the high end of the LLGF range. define i64 @f5(i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: llgf %r2, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -54,7 +54,7 @@ define i64 @f5(i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: llgf %r2, 0(%r2) ; CHECK: br %r14 @@ -66,7 +66,7 @@ define i64 @f6(i32 *%src) { ; Check the high end of the negative LLGF range. define i64 @f7(i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: llgf %r2, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -77,7 +77,7 @@ define i64 @f7(i32 *%src) { ; Check the low end of the LLGF range. define i64 @f8(i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: llgf %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -89,7 +89,7 @@ define i64 @f8(i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f9(i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524292 ; CHECK: llgf %r2, 0(%r2) ; CHECK: br %r14 @@ -101,7 +101,7 @@ define i64 @f9(i32 *%src) { ; Check that LLGF allows an index. define i64 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: llgf %r2, 524287(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-conv-11.ll b/test/CodeGen/SystemZ/int-conv-11.ll new file mode 100644 index 0000000000000..30769621bf82d --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-11.ll @@ -0,0 +1,350 @@ +; Test spills of zero extensions when high GR32s are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test a case where we spill the source of at least one LLCRMux. We want +; to use LLC(H) if possible. +define void @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: llc{{h?}} {{%r[0-9]+}}, 16{{[37]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + %val16 = load volatile i32 *%ptr + %val17 = load volatile i32 *%ptr + %val18 = load volatile i32 *%ptr + %val19 = load volatile i32 *%ptr + %val20 = load volatile i32 *%ptr + %val21 = load volatile i32 *%ptr + %val22 = load volatile i32 *%ptr + %val23 = load volatile i32 *%ptr + %val24 = load volatile i32 *%ptr + %val25 = load volatile i32 *%ptr + %val26 = load volatile i32 *%ptr + %val27 = load volatile i32 *%ptr + %val28 = load volatile i32 *%ptr + %val29 = load volatile i32 *%ptr + %val30 = load volatile i32 *%ptr + %val31 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i8 + %trunc1 = trunc i32 %val1 to i8 + %trunc2 = trunc i32 %val2 to i8 + %trunc3 = trunc i32 %val3 to i8 + %trunc4 = trunc i32 %val4 to i8 + %trunc5 = trunc i32 %val5 to i8 + %trunc6 = trunc i32 %val6 to i8 + %trunc7 = trunc i32 %val7 to i8 + %trunc8 = trunc i32 %val8 to i8 + %trunc9 = trunc i32 %val9 to i8 + %trunc10 = trunc i32 %val10 to i8 + %trunc11 = trunc i32 %val11 to i8 + %trunc12 = trunc i32 %val12 to i8 + %trunc13 = trunc i32 %val13 to i8 + %trunc14 = trunc i32 %val14 to i8 + %trunc15 = trunc i32 %val15 to i8 + %trunc16 = trunc i32 %val16 to i8 + %trunc17 = trunc i32 %val17 to i8 + %trunc18 = trunc i32 %val18 to i8 + %trunc19 = trunc i32 %val19 to i8 + %trunc20 = trunc i32 %val20 to i8 + %trunc21 = trunc i32 %val21 to i8 + %trunc22 = trunc i32 %val22 to i8 + %trunc23 = trunc i32 %val23 to i8 + %trunc24 = trunc i32 %val24 to i8 + %trunc25 = trunc i32 %val25 to i8 + %trunc26 = trunc i32 %val26 to i8 + %trunc27 = trunc i32 %val27 to i8 + %trunc28 = trunc i32 %val28 to i8 + %trunc29 = trunc i32 %val29 to i8 + %trunc30 = trunc i32 %val30 to i8 + %trunc31 = trunc i32 %val31 to i8 + + %ext0 = zext i8 %trunc0 to i32 + %ext1 = zext i8 %trunc1 to i32 + %ext2 = zext i8 %trunc2 to i32 + %ext3 = zext i8 %trunc3 to i32 + %ext4 = zext i8 %trunc4 to i32 + %ext5 = zext i8 %trunc5 to i32 + %ext6 = zext i8 %trunc6 to i32 + %ext7 = zext i8 %trunc7 to i32 + %ext8 = zext i8 %trunc8 to i32 + %ext9 = zext i8 %trunc9 to i32 + %ext10 = zext i8 %trunc10 to i32 + %ext11 = zext i8 %trunc11 to i32 + %ext12 = zext i8 %trunc12 to i32 + %ext13 = zext i8 %trunc13 to i32 + %ext14 = zext i8 %trunc14 to i32 + %ext15 = zext i8 %trunc15 to i32 + %ext16 = zext i8 %trunc16 to i32 + %ext17 = zext i8 %trunc17 to i32 + %ext18 = zext i8 %trunc18 to i32 + %ext19 = zext i8 %trunc19 to i32 + %ext20 = zext i8 %trunc20 to i32 + %ext21 = zext i8 %trunc21 to i32 + %ext22 = zext i8 %trunc22 to i32 + %ext23 = zext i8 %trunc23 to i32 + %ext24 = zext i8 %trunc24 to i32 + %ext25 = zext i8 %trunc25 to i32 + %ext26 = zext i8 %trunc26 to i32 + %ext27 = zext i8 %trunc27 to i32 + %ext28 = zext i8 %trunc28 to i32 + %ext29 = zext i8 %trunc29 to i32 + %ext30 = zext i8 %trunc30 to i32 + %ext31 = zext i8 %trunc31 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + store volatile i32 %val16, i32 *%ptr + store volatile i32 %val17, i32 *%ptr + store volatile i32 %val18, i32 *%ptr + store volatile i32 %val19, i32 *%ptr + store volatile i32 %val20, i32 *%ptr + store volatile i32 %val21, i32 *%ptr + store volatile i32 %val22, i32 *%ptr + store volatile i32 %val23, i32 *%ptr + store volatile i32 %val24, i32 *%ptr + store volatile i32 %val25, i32 *%ptr + store volatile i32 %val26, i32 *%ptr + store volatile i32 %val27, i32 *%ptr + store volatile i32 %val28, i32 *%ptr + store volatile i32 %val29, i32 *%ptr + store volatile i32 %val30, i32 *%ptr + store volatile i32 %val31, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + store volatile i32 %ext16, i32 *%ptr + store volatile i32 %ext17, i32 *%ptr + store volatile i32 %ext18, i32 *%ptr + store volatile i32 %ext19, i32 *%ptr + store volatile i32 %ext20, i32 *%ptr + store volatile i32 %ext21, i32 *%ptr + store volatile i32 %ext22, i32 *%ptr + store volatile i32 %ext23, i32 *%ptr + store volatile i32 %ext24, i32 *%ptr + store volatile i32 %ext25, i32 *%ptr + store volatile i32 %ext26, i32 *%ptr + store volatile i32 %ext27, i32 *%ptr + store volatile i32 %ext28, i32 *%ptr + store volatile i32 %ext29, i32 *%ptr + store volatile i32 %ext30, i32 *%ptr + store volatile i32 %ext31, i32 *%ptr + + ret void +} + +; Same again with i16, which should use LLH(H). +define void @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: llh{{h?}} {{%r[0-9]+}}, 16{{[26]}}(%r15) +; CHECK: br %r14 + %val0 = load volatile i32 *%ptr + %val1 = load volatile i32 *%ptr + %val2 = load volatile i32 *%ptr + %val3 = load volatile i32 *%ptr + %val4 = load volatile i32 *%ptr + %val5 = load volatile i32 *%ptr + %val6 = load volatile i32 *%ptr + %val7 = load volatile i32 *%ptr + %val8 = load volatile i32 *%ptr + %val9 = load volatile i32 *%ptr + %val10 = load volatile i32 *%ptr + %val11 = load volatile i32 *%ptr + %val12 = load volatile i32 *%ptr + %val13 = load volatile i32 *%ptr + %val14 = load volatile i32 *%ptr + %val15 = load volatile i32 *%ptr + %val16 = load volatile i32 *%ptr + %val17 = load volatile i32 *%ptr + %val18 = load volatile i32 *%ptr + %val19 = load volatile i32 *%ptr + %val20 = load volatile i32 *%ptr + %val21 = load volatile i32 *%ptr + %val22 = load volatile i32 *%ptr + %val23 = load volatile i32 *%ptr + %val24 = load volatile i32 *%ptr + %val25 = load volatile i32 *%ptr + %val26 = load volatile i32 *%ptr + %val27 = load volatile i32 *%ptr + %val28 = load volatile i32 *%ptr + %val29 = load volatile i32 *%ptr + %val30 = load volatile i32 *%ptr + %val31 = load volatile i32 *%ptr + + %trunc0 = trunc i32 %val0 to i16 + %trunc1 = trunc i32 %val1 to i16 + %trunc2 = trunc i32 %val2 to i16 + %trunc3 = trunc i32 %val3 to i16 + %trunc4 = trunc i32 %val4 to i16 + %trunc5 = trunc i32 %val5 to i16 + %trunc6 = trunc i32 %val6 to i16 + %trunc7 = trunc i32 %val7 to i16 + %trunc8 = trunc i32 %val8 to i16 + %trunc9 = trunc i32 %val9 to i16 + %trunc10 = trunc i32 %val10 to i16 + %trunc11 = trunc i32 %val11 to i16 + %trunc12 = trunc i32 %val12 to i16 + %trunc13 = trunc i32 %val13 to i16 + %trunc14 = trunc i32 %val14 to i16 + %trunc15 = trunc i32 %val15 to i16 + %trunc16 = trunc i32 %val16 to i16 + %trunc17 = trunc i32 %val17 to i16 + %trunc18 = trunc i32 %val18 to i16 + %trunc19 = trunc i32 %val19 to i16 + %trunc20 = trunc i32 %val20 to i16 + %trunc21 = trunc i32 %val21 to i16 + %trunc22 = trunc i32 %val22 to i16 + %trunc23 = trunc i32 %val23 to i16 + %trunc24 = trunc i32 %val24 to i16 + %trunc25 = trunc i32 %val25 to i16 + %trunc26 = trunc i32 %val26 to i16 + %trunc27 = trunc i32 %val27 to i16 + %trunc28 = trunc i32 %val28 to i16 + %trunc29 = trunc i32 %val29 to i16 + %trunc30 = trunc i32 %val30 to i16 + %trunc31 = trunc i32 %val31 to i16 + + %ext0 = zext i16 %trunc0 to i32 + %ext1 = zext i16 %trunc1 to i32 + %ext2 = zext i16 %trunc2 to i32 + %ext3 = zext i16 %trunc3 to i32 + %ext4 = zext i16 %trunc4 to i32 + %ext5 = zext i16 %trunc5 to i32 + %ext6 = zext i16 %trunc6 to i32 + %ext7 = zext i16 %trunc7 to i32 + %ext8 = zext i16 %trunc8 to i32 + %ext9 = zext i16 %trunc9 to i32 + %ext10 = zext i16 %trunc10 to i32 + %ext11 = zext i16 %trunc11 to i32 + %ext12 = zext i16 %trunc12 to i32 + %ext13 = zext i16 %trunc13 to i32 + %ext14 = zext i16 %trunc14 to i32 + %ext15 = zext i16 %trunc15 to i32 + %ext16 = zext i16 %trunc16 to i32 + %ext17 = zext i16 %trunc17 to i32 + %ext18 = zext i16 %trunc18 to i32 + %ext19 = zext i16 %trunc19 to i32 + %ext20 = zext i16 %trunc20 to i32 + %ext21 = zext i16 %trunc21 to i32 + %ext22 = zext i16 %trunc22 to i32 + %ext23 = zext i16 %trunc23 to i32 + %ext24 = zext i16 %trunc24 to i32 + %ext25 = zext i16 %trunc25 to i32 + %ext26 = zext i16 %trunc26 to i32 + %ext27 = zext i16 %trunc27 to i32 + %ext28 = zext i16 %trunc28 to i32 + %ext29 = zext i16 %trunc29 to i32 + %ext30 = zext i16 %trunc30 to i32 + %ext31 = zext i16 %trunc31 to i32 + + store volatile i32 %val0, i32 *%ptr + store volatile i32 %val1, i32 *%ptr + store volatile i32 %val2, i32 *%ptr + store volatile i32 %val3, i32 *%ptr + store volatile i32 %val4, i32 *%ptr + store volatile i32 %val5, i32 *%ptr + store volatile i32 %val6, i32 *%ptr + store volatile i32 %val7, i32 *%ptr + store volatile i32 %val8, i32 *%ptr + store volatile i32 %val9, i32 *%ptr + store volatile i32 %val10, i32 *%ptr + store volatile i32 %val11, i32 *%ptr + store volatile i32 %val12, i32 *%ptr + store volatile i32 %val13, i32 *%ptr + store volatile i32 %val14, i32 *%ptr + store volatile i32 %val15, i32 *%ptr + store volatile i32 %val16, i32 *%ptr + store volatile i32 %val17, i32 *%ptr + store volatile i32 %val18, i32 *%ptr + store volatile i32 %val19, i32 *%ptr + store volatile i32 %val20, i32 *%ptr + store volatile i32 %val21, i32 *%ptr + store volatile i32 %val22, i32 *%ptr + store volatile i32 %val23, i32 *%ptr + store volatile i32 %val24, i32 *%ptr + store volatile i32 %val25, i32 *%ptr + store volatile i32 %val26, i32 *%ptr + store volatile i32 %val27, i32 *%ptr + store volatile i32 %val28, i32 *%ptr + store volatile i32 %val29, i32 *%ptr + store volatile i32 %val30, i32 *%ptr + store volatile i32 %val31, i32 *%ptr + + store volatile i32 %ext0, i32 *%ptr + store volatile i32 %ext1, i32 *%ptr + store volatile i32 %ext2, i32 *%ptr + store volatile i32 %ext3, i32 *%ptr + store volatile i32 %ext4, i32 *%ptr + store volatile i32 %ext5, i32 *%ptr + store volatile i32 %ext6, i32 *%ptr + store volatile i32 %ext7, i32 *%ptr + store volatile i32 %ext8, i32 *%ptr + store volatile i32 %ext9, i32 *%ptr + store volatile i32 %ext10, i32 *%ptr + store volatile i32 %ext11, i32 *%ptr + store volatile i32 %ext12, i32 *%ptr + store volatile i32 %ext13, i32 *%ptr + store volatile i32 %ext14, i32 *%ptr + store volatile i32 %ext15, i32 *%ptr + store volatile i32 %ext16, i32 *%ptr + store volatile i32 %ext17, i32 *%ptr + store volatile i32 %ext18, i32 *%ptr + store volatile i32 %ext19, i32 *%ptr + store volatile i32 %ext20, i32 *%ptr + store volatile i32 %ext21, i32 *%ptr + store volatile i32 %ext22, i32 *%ptr + store volatile i32 %ext23, i32 *%ptr + store volatile i32 %ext24, i32 *%ptr + store volatile i32 %ext25, i32 *%ptr + store volatile i32 %ext26, i32 *%ptr + store volatile i32 %ext27, i32 *%ptr + store volatile i32 %ext28, i32 *%ptr + store volatile i32 %ext29, i32 *%ptr + store volatile i32 %ext30, i32 *%ptr + store volatile i32 %ext31, i32 *%ptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll index 492ece91497e3..2c21186e33693 100644 --- a/test/CodeGen/SystemZ/int-div-01.ll +++ b/test/CodeGen/SystemZ/int-div-01.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @foo() + ; Test register division. The result is in the second of the two registers. define void @f1(i32 *%dest, i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lgfr %r1, %r3 ; CHECK: dsgfr %r0, %r4 ; CHECK: st %r1, 0(%r2) @@ -16,7 +18,7 @@ define void @f1(i32 *%dest, i32 %a, i32 %b) { ; Test register remainder. The result is in the first of the two registers. define void @f2(i32 *%dest, i32 %a, i32 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lgfr %r1, %r3 ; CHECK: dsgfr %r0, %r4 ; CHECK: st %r0, 0(%r2) @@ -28,7 +30,7 @@ define void @f2(i32 *%dest, i32 %a, i32 %b) { ; Test that division and remainder use a single instruction. define i32 @f3(i32 %dummy, i32 %a, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: %r2 ; CHECK: lgfr %r3, %r3 ; CHECK-NOT: %r2 @@ -45,7 +47,7 @@ define i32 @f3(i32 %dummy, i32 %a, i32 %b) { ; Check that the sign extension of the dividend is elided when the argument ; is already sign-extended. define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgfr %r2, %r4 ; CHECK-NOT: dsgfr @@ -59,7 +61,7 @@ define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) { ; Test that memory dividends are loaded using sign extension (LGF). define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: %r2 ; CHECK: lgf %r3, 0(%r3) ; CHECK-NOT: %r2 @@ -76,7 +78,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { ; Test memory division with no displacement. define void @f6(i32 *%dest, i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgfr %r1, %r3 ; CHECK: dsgf %r0, 0(%r4) ; CHECK: st %r1, 0(%r2) @@ -89,7 +91,7 @@ define void @f6(i32 *%dest, i32 %a, i32 *%src) { ; Test memory remainder with no displacement. define void @f7(i32 *%dest, i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgfr %r1, %r3 ; CHECK: dsgf %r0, 0(%r4) ; CHECK: st %r0, 0(%r2) @@ -102,7 +104,7 @@ define void @f7(i32 *%dest, i32 %a, i32 *%src) { ; Test both memory division and memory remainder. define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK-NOT: %r2 ; CHECK: lgfr %r3, %r3 ; CHECK-NOT: %r2 @@ -119,7 +121,7 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { ; Check the high end of the DSGF range. define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: dsgf %r2, 524284(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -131,7 +133,7 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r4, 524288 ; CHECK: dsgf %r2, 0(%r4) ; CHECK: br %r14 @@ -143,7 +145,7 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { ; Check the high end of the negative aligned DSGF range. define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: dsgf %r2, -4(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -154,7 +156,7 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { ; Check the low end of the DSGF range. define i32 @f12(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: dsgf %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -166,7 +168,7 @@ define i32 @f12(i32 %dummy, i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f13(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agfi %r4, -524292 ; CHECK: dsgf %r2, 0(%r4) ; CHECK: br %r14 @@ -178,7 +180,7 @@ define i32 @f13(i32 %dummy, i32 %a, i32 *%src) { ; Check that DSGF allows an index. define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: dsgf %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -188,3 +190,62 @@ define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) { %rem = srem i32 %a, %b ret i32 %rem } + +; Make sure that we still use DSGFR rather than DSGR in cases where +; a load and division cannot be combined. +define void @f15(i32 *%dest, i32 *%src) { +; CHECK-LABEL: f15: +; CHECK: l [[B:%r[0-9]+]], 0(%r3) +; CHECK: brasl %r14, foo@PLT +; CHECK: lgfr %r1, %r2 +; CHECK: dsgfr %r0, [[B]] +; CHECK: br %r14 + %b = load i32 *%src + %a = call i32 @foo() + %div = sdiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} + +; Check that divisions of spilled values can use DSGF rather than DSGFR. +define i32 @f16(i32 *%ptr0) { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, foo@PLT +; CHECK: dsgf {{%r[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %div0 = sdiv i32 %ret, %val0 + %div1 = sdiv i32 %div0, %val1 + %div2 = sdiv i32 %div1, %val2 + %div3 = sdiv i32 %div2, %val3 + %div4 = sdiv i32 %div3, %val4 + %div5 = sdiv i32 %div4, %val5 + %div6 = sdiv i32 %div5, %val6 + %div7 = sdiv i32 %div6, %val7 + %div8 = sdiv i32 %div7, %val8 + %div9 = sdiv i32 %div8, %val9 + + ret i32 %div9 +} diff --git a/test/CodeGen/SystemZ/int-div-02.ll b/test/CodeGen/SystemZ/int-div-02.ll index 7954384d2962b..f3287a56c6cda 100644 --- a/test/CodeGen/SystemZ/int-div-02.ll +++ b/test/CodeGen/SystemZ/int-div-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @foo() + ; Test register division. The result is in the second of the two registers. define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -18,7 +20,7 @@ define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { ; Test register remainder. The result is in the first of the two registers. define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -32,7 +34,7 @@ define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { ; Test that division and remainder use a single instruction. define i32 @f3(i32 %dummy1, i32 %a, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -48,7 +50,7 @@ define i32 @f3(i32 %dummy1, i32 %a, i32 %b) { ; Test memory division with no displacement. define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -63,7 +65,7 @@ define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { ; Test memory remainder with no displacement. define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -78,7 +80,7 @@ define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { ; Test both memory division and memory remainder. define i32 @f6(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: %r3 ; CHECK: {{llill|lhi}} %r2, 0 ; CHECK-NOT: %r3 @@ -95,7 +97,7 @@ define i32 @f6(i32 %dummy, i32 %a, i32 *%src) { ; Check the high end of the DL range. define i32 @f7(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: dl %r2, 524284(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -107,7 +109,7 @@ define i32 @f7(i32 %dummy, i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r4, 524288 ; CHECK: dl %r2, 0(%r4) ; CHECK: br %r14 @@ -119,7 +121,7 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { ; Check the high end of the negative aligned DL range. define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: dl %r2, -4(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -130,7 +132,7 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { ; Check the low end of the DL range. define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: dl %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -142,7 +144,7 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r4, -524292 ; CHECK: dl %r2, 0(%r4) ; CHECK: br %r14 @@ -154,7 +156,7 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { ; Check that DL allows an index. define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: dl %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -164,3 +166,46 @@ define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) { %rem = urem i32 %a, %b ret i32 %rem } + +; Check that divisions of spilled values can use DL rather than DLR. +define i32 @f13(i32 *%ptr0) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, foo@PLT +; CHECK: dl {{%r[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %div0 = udiv i32 %ret, %val0 + %div1 = udiv i32 %div0, %val1 + %div2 = udiv i32 %div1, %val2 + %div3 = udiv i32 %div2, %val3 + %div4 = udiv i32 %div3, %val4 + %div5 = udiv i32 %div4, %val5 + %div6 = udiv i32 %div5, %val6 + %div7 = udiv i32 %div6, %val7 + %div8 = udiv i32 %div7, %val8 + %div9 = udiv i32 %div8, %val9 + + ret i32 %div9 +} diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll index b950f2b02035d..7c0409018f161 100644 --- a/test/CodeGen/SystemZ/int-div-03.ll +++ b/test/CodeGen/SystemZ/int-div-03.ll @@ -3,9 +3,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Test register division. The result is in the second of the two registers. define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgfr %r2, %r4 ; CHECK: stg %r3, 0(%r5) @@ -18,7 +20,7 @@ define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { ; Test register remainder. The result is in the first of the two registers. define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgfr %r2, %r4 ; CHECK: stg %r2, 0(%r5) @@ -31,7 +33,7 @@ define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { ; Test that division and remainder use a single instruction. define i64 @f3(i64 %dummy, i64 %a, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgfr %r2, %r4 ; CHECK: ogr %r2, %r3 @@ -46,7 +48,7 @@ define i64 @f3(i64 %dummy, i64 %a, i32 %b) { ; Test register division when the dividend is zero rather than sign extended. ; We can't use dsgfr here define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: dsgfr ; CHECK: br %r14 %bext = zext i32 %b to i64 @@ -57,7 +59,7 @@ define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { ; ...likewise remainder. define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: dsgfr ; CHECK: br %r14 %bext = zext i32 %b to i64 @@ -68,7 +70,7 @@ define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { ; Test memory division with no displacement. define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgf %r2, 0(%r4) ; CHECK: stg %r3, 0(%r5) @@ -82,7 +84,7 @@ define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { ; Test memory remainder with no displacement. define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgf %r2, 0(%r4) ; CHECK: stg %r2, 0(%r5) @@ -96,7 +98,7 @@ define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { ; Test both memory division and memory remainder. define i64 @f8(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgf %r2, 0(%r4) ; CHECK-NOT: {{dsgf|dsgfr}} @@ -112,7 +114,7 @@ define i64 @f8(i64 %dummy, i64 %a, i32 *%src) { ; Check the high end of the DSGF range. define i64 @f9(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: dsgf %r2, 524284(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -125,7 +127,7 @@ define i64 @f9(i64 %dummy, i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f10(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r4, 524288 ; CHECK: dsgf %r2, 0(%r4) ; CHECK: br %r14 @@ -138,7 +140,7 @@ define i64 @f10(i64 %dummy, i64 %a, i32 *%src) { ; Check the high end of the negative aligned DSGF range. define i64 @f11(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: dsgf %r2, -4(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -150,7 +152,7 @@ define i64 @f11(i64 %dummy, i64 %a, i32 *%src) { ; Check the low end of the DSGF range. define i64 @f12(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: dsgf %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -163,7 +165,7 @@ define i64 @f12(i64 %dummy, i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f13(i64 %dummy, i64 %a, i32 *%src) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agfi %r4, -524292 ; CHECK: dsgf %r2, 0(%r4) ; CHECK: br %r14 @@ -176,7 +178,7 @@ define i64 @f13(i64 %dummy, i64 %a, i32 *%src) { ; Check that DSGF allows an index. define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: dsgf %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -187,3 +189,20 @@ define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) { %rem = srem i64 %a, %bext ret i64 %rem } + +; Make sure that we still use DSGFR rather than DSGR in cases where +; a load and division cannot be combined. +define void @f15(i64 *%dest, i32 *%src) { +; CHECK-LABEL: f15: +; CHECK: l [[B:%r[0-9]+]], 0(%r3) +; CHECK: brasl %r14, foo@PLT +; CHECK: lgr %r1, %r2 +; CHECK: dsgfr %r0, [[B]] +; CHECK: br %r14 + %b = load i32 *%src + %a = call i64 @foo() + %ext = sext i32 %b to i64 + %div = sdiv i64 %a, %ext + store i64 %div, i64 *%dest + ret void +} diff --git a/test/CodeGen/SystemZ/int-div-04.ll b/test/CodeGen/SystemZ/int-div-04.ll index 3f72be9a47da6..87f1e105f6a4f 100644 --- a/test/CodeGen/SystemZ/int-div-04.ll +++ b/test/CodeGen/SystemZ/int-div-04.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Testg register division. The result is in the second of the two registers. define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgr %r2, %r4 ; CHECK: stg %r3, 0(%r5) @@ -16,7 +18,7 @@ define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { ; Testg register remainder. The result is in the first of the two registers. define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgr %r2, %r4 ; CHECK: stg %r2, 0(%r5) @@ -28,7 +30,7 @@ define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { ; Testg that division and remainder use a single instruction. define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsgr %r2, %r4 ; CHECK-NOT: dsgr @@ -42,7 +44,7 @@ define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { ; Testg memory division with no displacement. define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsg %r2, 0(%r4) ; CHECK: stg %r3, 0(%r5) @@ -55,7 +57,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { ; Testg memory remainder with no displacement. define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsg %r2, 0(%r4) ; CHECK: stg %r2, 0(%r5) @@ -68,7 +70,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { ; Testg both memory division and memory remainder. define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: {{%r[234]}} ; CHECK: dsg %r2, 0(%r4) ; CHECK-NOT: {{dsg|dsgr}} @@ -83,7 +85,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the DSG range. define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: dsg %r2, 524280(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -95,7 +97,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r4, 524288 ; CHECK: dsg %r2, 0(%r4) ; CHECK: br %r14 @@ -107,7 +109,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the negative aligned DSG range. define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: dsg %r2, -8(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -118,7 +120,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { ; Check the low end of the DSG range. define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: dsg %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -130,7 +132,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r4, -524296 ; CHECK: dsg %r2, 0(%r4) ; CHECK: br %r14 @@ -142,7 +144,7 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { ; Check that DSG allows an index. define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: dsg %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -152,3 +154,49 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { %rem = srem i64 %a, %b ret i64 %rem } + +; Check that divisions of spilled values can use DSG rather than DSGR. +define i64 @f13(i64 *%ptr0) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, foo@PLT +; CHECK: dsg {{%r[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + %ptr10 = getelementptr i64 *%ptr0, i64 20 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + %val10 = load i64 *%ptr10 + + %ret = call i64 @foo() + + %div0 = sdiv i64 %ret, %val0 + %div1 = sdiv i64 %div0, %val1 + %div2 = sdiv i64 %div1, %val2 + %div3 = sdiv i64 %div2, %val3 + %div4 = sdiv i64 %div3, %val4 + %div5 = sdiv i64 %div4, %val5 + %div6 = sdiv i64 %div5, %val6 + %div7 = sdiv i64 %div6, %val7 + %div8 = sdiv i64 %div7, %val8 + %div9 = sdiv i64 %div8, %val9 + %div10 = sdiv i64 %div9, %val10 + + ret i64 %div10 +} diff --git a/test/CodeGen/SystemZ/int-div-05.ll b/test/CodeGen/SystemZ/int-div-05.ll index 04f622b44e744..817983005a9db 100644 --- a/test/CodeGen/SystemZ/int-div-05.ll +++ b/test/CodeGen/SystemZ/int-div-05.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Testg register division. The result is in the second of the two registers. define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -18,7 +20,7 @@ define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { ; Testg register remainder. The result is in the first of the two registers. define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -32,7 +34,7 @@ define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { ; Testg that division and remainder use a single instruction. define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -48,7 +50,7 @@ define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { ; Testg memory division with no displacement. define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -63,7 +65,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { ; Testg memory remainder with no displacement. define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -78,7 +80,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { ; Testg both memory division and memory remainder. define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: %r3 ; CHECK: {{llill|lghi}} %r2, 0 ; CHECK-NOT: %r3 @@ -95,7 +97,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the DLG range. define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: dlg %r2, 524280(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -107,7 +109,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r4, 524288 ; CHECK: dlg %r2, 0(%r4) ; CHECK: br %r14 @@ -119,7 +121,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the negative aligned DLG range. define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: dlg %r2, -8(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -130,7 +132,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { ; Check the low end of the DLG range. define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: dlg %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -142,7 +144,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r4, -524296 ; CHECK: dlg %r2, 0(%r4) ; CHECK: br %r14 @@ -154,7 +156,7 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { ; Check that DLG allows an index. define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: dlg %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -164,3 +166,49 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { %rem = urem i64 %a, %b ret i64 %rem } + +; Check that divisions of spilled values can use DLG rather than DLGR. +define i64 @f13(i64 *%ptr0) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, foo@PLT +; CHECK: dlg {{%r[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + %ptr10 = getelementptr i64 *%ptr0, i64 20 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + %val10 = load i64 *%ptr10 + + %ret = call i64 @foo() + + %div0 = udiv i64 %ret, %val0 + %div1 = udiv i64 %div0, %val1 + %div2 = udiv i64 %div1, %val2 + %div3 = udiv i64 %div2, %val3 + %div4 = udiv i64 %div3, %val4 + %div5 = udiv i64 %div4, %val5 + %div6 = udiv i64 %div5, %val6 + %div7 = udiv i64 %div6, %val7 + %div8 = udiv i64 %div7, %val8 + %div9 = udiv i64 %div8, %val9 + %div10 = udiv i64 %div9, %val10 + + ret i64 %div10 +} diff --git a/test/CodeGen/SystemZ/int-div-06.ll b/test/CodeGen/SystemZ/int-div-06.ll new file mode 100644 index 0000000000000..8576b1b6270a3 --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-06.ll @@ -0,0 +1,56 @@ +; Test that divisions by constants are implemented as multiplications. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check signed 32-bit division. +define i32 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: lgfr [[REG:%r[0-5]]], %r2 +; CHECK: msgfi [[REG]], 502748801 +; CHECK-DAG: srlg [[RES1:%r[0-5]]], [[REG]], 63 +; CHECK-DAG: srag %r2, [[REG]], 46 +; CHECK: ar %r2, [[RES1]] +; CHECK: br %r14 + %b = sdiv i32 %a, 139968 + ret i32 %b +} + +; Check unsigned 32-bit division. +define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: llgfr [[REG:%r[0-5]]], %r2 +; CHECK: msgfi [[REG]], 502748801 +; CHECK: srlg %r2, [[REG]], 46 +; CHECK: br %r14 + %b = udiv i32 %a, 139968 + ret i32 %b +} + +; Check signed 64-bit division. +define i64 @f3(i64 %dummy, i64 %a) { +; CHECK-LABEL: f3: +; CHECK-DAG: llihf [[CONST:%r[0-5]]], 1005497601 +; CHECK-DAG: oilf [[CONST]], 4251762321 +; CHECK-DAG: srag [[REG:%r[0-5]]], %r3, 63 +; CHECK-DAG: ngr [[REG]], [[CONST]] +; CHECK-DAG: mlgr %r2, [[CONST]] +; CHECK: sgr %r2, [[REG]] +; CHECK: srlg [[RES1:%r[0-5]]], %r2, 63 +; CHECK: srag %r2, %r2, 15 +; CHECK: agr %r2, [[RES1]] +; CHECK: br %r14 + %b = sdiv i64 %a, 139968 + ret i64 %b +} + +; Check unsigned 64-bit division. +define i64 @f4(i64 %dummy, i64 %a) { +; CHECK-LABEL: f4: +; CHECK: llihf [[CONST:%r[0-5]]], 1005497601 +; CHECK: oilf [[CONST]], 4251762321 +; CHECK: mlgr %r2, [[CONST]] +; CHECK: srlg %r2, %r2, 15 +; CHECK: br %r14 + %b = udiv i64 %a, 139968 + ret i64 %b +} diff --git a/test/CodeGen/SystemZ/int-move-01.ll b/test/CodeGen/SystemZ/int-move-01.ll index ae890ade3275f..038e6887d67ce 100644 --- a/test/CodeGen/SystemZ/int-move-01.ll +++ b/test/CodeGen/SystemZ/int-move-01.ll @@ -4,7 +4,7 @@ ; Test 8-bit moves, which should get promoted to i32. define i8 @f1(i8 %a, i8 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lr %r2, %r3 ; CHECK: br %r14 ret i8 %b @@ -12,7 +12,7 @@ define i8 @f1(i8 %a, i8 %b) { ; Test 16-bit moves, which again should get promoted to i32. define i16 @f2(i16 %a, i16 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lr %r2, %r3 ; CHECK: br %r14 ret i16 %b @@ -20,7 +20,7 @@ define i16 @f2(i16 %a, i16 %b) { ; Test 32-bit moves. define i32 @f3(i32 %a, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lr %r2, %r3 ; CHECK: br %r14 ret i32 %b @@ -28,7 +28,7 @@ define i32 @f3(i32 %a, i32 %b) { ; Test 64-bit moves. define i64 @f4(i64 %a, i64 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgr %r2, %r3 ; CHECK: br %r14 ret i64 %b diff --git a/test/CodeGen/SystemZ/int-move-02.ll b/test/CodeGen/SystemZ/int-move-02.ll index 467e22d89c5a3..5fc0843290f98 100644 --- a/test/CodeGen/SystemZ/int-move-02.ll +++ b/test/CodeGen/SystemZ/int-move-02.ll @@ -4,7 +4,7 @@ ; Check the low end of the L range. define i32 @f1(i32 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r2) ; CHECK: br %r14 %val = load i32 *%src @@ -13,7 +13,7 @@ define i32 @f1(i32 *%src) { ; Check the high end of the aligned L range. define i32 @f2(i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: l %r2, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -23,7 +23,7 @@ define i32 @f2(i32 *%src) { ; Check the next word up, which should use LY instead of L. define i32 @f3(i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ly %r2, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -33,7 +33,7 @@ define i32 @f3(i32 *%src) { ; Check the high end of the aligned LY range. define i32 @f4(i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: ly %r2, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -44,7 +44,7 @@ define i32 @f4(i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f5(i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r2, 524288 ; CHECK: l %r2, 0(%r2) ; CHECK: br %r14 @@ -55,7 +55,7 @@ define i32 @f5(i32 *%src) { ; Check the high end of the negative aligned LY range. define i32 @f6(i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: ly %r2, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -65,7 +65,7 @@ define i32 @f6(i32 *%src) { ; Check the low end of the LY range. define i32 @f7(i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: ly %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +76,7 @@ define i32 @f7(i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, -524292 ; CHECK: l %r2, 0(%r2) ; CHECK: br %r14 @@ -87,7 +87,7 @@ define i32 @f8(i32 *%src) { ; Check that L allows an index. define i32 @f9(i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: l %r2, 4095({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -99,7 +99,7 @@ define i32 @f9(i64 %src, i64 %index) { ; Check that LY allows an index. define i32 @f10(i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ly %r2, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-move-03.ll b/test/CodeGen/SystemZ/int-move-03.ll index 97c70a2740c10..2894512e8eead 100644 --- a/test/CodeGen/SystemZ/int-move-03.ll +++ b/test/CodeGen/SystemZ/int-move-03.ll @@ -4,7 +4,7 @@ ; Check LG with no displacement. define i64 @f1(i64 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r2) ; CHECK: br %r14 %val = load i64 *%src @@ -13,7 +13,7 @@ define i64 @f1(i64 *%src) { ; Check the high end of the aligned LG range. define i64 @f2(i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lg %r2, 524280(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -24,7 +24,7 @@ define i64 @f2(i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f3(i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: lg %r2, 0(%r2) ; CHECK: br %r14 @@ -35,7 +35,7 @@ define i64 @f3(i64 *%src) { ; Check the high end of the negative aligned LG range. define i64 @f4(i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lg %r2, -8(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -45,7 +45,7 @@ define i64 @f4(i64 *%src) { ; Check the low end of the LG range. define i64 @f5(i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lg %r2, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -56,7 +56,7 @@ define i64 @f5(i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f6(i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524296 ; CHECK: lg %r2, 0(%r2) ; CHECK: br %r14 @@ -67,7 +67,7 @@ define i64 @f6(i64 *%src) { ; Check that LG allows an index. define i64 @f7(i64 %src, i64 %index) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lg %r2, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-move-04.ll b/test/CodeGen/SystemZ/int-move-04.ll index 9736657b1efad..d97ed2f54a4bb 100644 --- a/test/CodeGen/SystemZ/int-move-04.ll +++ b/test/CodeGen/SystemZ/int-move-04.ll @@ -4,7 +4,7 @@ ; Test an i8 store, which should get converted into an i32 truncation. define void @f1(i8 *%dst, i8 %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 store i8 %val, i8 *%dst @@ -13,7 +13,7 @@ define void @f1(i8 *%dst, i8 %val) { ; Test an i32 truncating store. define void @f2(i8 *%dst, i32 %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 %trunc = trunc i32 %val to i8 @@ -23,7 +23,7 @@ define void @f2(i8 *%dst, i32 %val) { ; Test an i64 truncating store. define void @f3(i8 *%dst, i64 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 %trunc = trunc i64 %val to i8 @@ -33,7 +33,7 @@ define void @f3(i8 *%dst, i64 %val) { ; Check the high end of the STC range. define void @f4(i8 *%dst, i8 %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stc %r3, 4095(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%dst, i64 4095 @@ -43,7 +43,7 @@ define void @f4(i8 *%dst, i8 %val) { ; Check the next byte up, which should use STCY instead of STC. define void @f5(i8 *%dst, i8 %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: stcy %r3, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%dst, i64 4096 @@ -53,7 +53,7 @@ define void @f5(i8 *%dst, i8 %val) { ; Check the high end of the STCY range. define void @f6(i8 *%dst, i8 %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: stcy %r3, 524287(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%dst, i64 524287 @@ -64,7 +64,7 @@ define void @f6(i8 *%dst, i8 %val) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f7(i8 *%dst, i8 %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r2, 524288 ; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 @@ -75,7 +75,7 @@ define void @f7(i8 *%dst, i8 %val) { ; Check the high end of the negative STCY range. define void @f8(i8 *%dst, i8 %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: stcy %r3, -1(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%dst, i64 -1 @@ -85,7 +85,7 @@ define void @f8(i8 *%dst, i8 %val) { ; Check the low end of the STCY range. define void @f9(i8 *%dst, i8 %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: stcy %r3, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i8 *%dst, i64 -524288 @@ -96,7 +96,7 @@ define void @f9(i8 *%dst, i8 %val) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f10(i8 *%dst, i8 %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r2, -524289 ; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 @@ -107,7 +107,7 @@ define void @f10(i8 *%dst, i8 %val) { ; Check that STC allows an index. define void @f11(i64 %dst, i64 %index, i8 %val) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: stc %r4, 4095(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %dst, %index @@ -119,7 +119,7 @@ define void @f11(i64 %dst, i64 %index, i8 %val) { ; Check that STCY allows an index. define void @f12(i64 %dst, i64 %index, i8 %val) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: stcy %r4, 4096(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %dst, %index diff --git a/test/CodeGen/SystemZ/int-move-05.ll b/test/CodeGen/SystemZ/int-move-05.ll index f61477e71830f..c21b88aa7baac 100644 --- a/test/CodeGen/SystemZ/int-move-05.ll +++ b/test/CodeGen/SystemZ/int-move-05.ll @@ -4,7 +4,7 @@ ; Test an i16 store, which should get converted into an i32 truncation. define void @f1(i16 *%dst, i16 %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 store i16 %val, i16 *%dst @@ -13,7 +13,7 @@ define void @f1(i16 *%dst, i16 %val) { ; Test an i32 truncating store. define void @f2(i16 *%dst, i32 %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 %trunc = trunc i32 %val to i16 @@ -23,7 +23,7 @@ define void @f2(i16 *%dst, i32 %val) { ; Test an i64 truncating store. define void @f3(i16 *%dst, i64 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 %trunc = trunc i64 %val to i16 @@ -33,7 +33,7 @@ define void @f3(i16 *%dst, i64 %val) { ; Check the high end of the STH range. define void @f4(i16 *%dst, i16 %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sth %r3, 4094(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%dst, i64 2047 @@ -43,7 +43,7 @@ define void @f4(i16 *%dst, i16 %val) { ; Check the next halfword up, which should use STHY instead of STH. define void @f5(i16 *%dst, i16 %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sthy %r3, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%dst, i64 2048 @@ -53,7 +53,7 @@ define void @f5(i16 *%dst, i16 %val) { ; Check the high end of the aligned STHY range. define void @f6(i16 *%dst, i16 %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sthy %r3, 524286(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%dst, i64 262143 @@ -64,7 +64,7 @@ define void @f6(i16 *%dst, i16 %val) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f7(i16 *%dst, i16 %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r2, 524288 ; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 @@ -75,7 +75,7 @@ define void @f7(i16 *%dst, i16 %val) { ; Check the high end of the negative aligned STHY range. define void @f8(i16 *%dst, i16 %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sthy %r3, -2(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%dst, i64 -1 @@ -85,7 +85,7 @@ define void @f8(i16 *%dst, i16 %val) { ; Check the low end of the STHY range. define void @f9(i16 *%dst, i16 %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: sthy %r3, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i16 *%dst, i64 -262144 @@ -96,7 +96,7 @@ define void @f9(i16 *%dst, i16 %val) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f10(i16 *%dst, i16 %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: agfi %r2, -524290 ; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 @@ -107,7 +107,7 @@ define void @f10(i16 *%dst, i16 %val) { ; Check that STH allows an index. define void @f11(i64 %dst, i64 %index, i16 %val) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: sth %r4, 4094({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %dst, %index @@ -119,7 +119,7 @@ define void @f11(i64 %dst, i64 %index, i16 %val) { ; Check that STHY allows an index. define void @f12(i64 %dst, i64 %index, i16 %val) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: sthy %r4, 4096({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %dst, %index diff --git a/test/CodeGen/SystemZ/int-move-06.ll b/test/CodeGen/SystemZ/int-move-06.ll index 5b35a32ff543a..b8c6f53e15d85 100644 --- a/test/CodeGen/SystemZ/int-move-06.ll +++ b/test/CodeGen/SystemZ/int-move-06.ll @@ -4,7 +4,7 @@ ; Test an i32 store. define void @f1(i32 *%dst, i32 %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: st %r3, 0(%r2) ; CHECK: br %r14 store i32 %val, i32 *%dst @@ -20,7 +20,7 @@ define void @f2(i32 *%dst, i64 %val) { ; Check the high end of the aligned ST range. define void @f3(i32 *%dst, i32 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: st %r3, 4092(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%dst, i64 1023 @@ -30,7 +30,7 @@ define void @f3(i32 *%dst, i32 %val) { ; Check the next word up, which should use STY instead of ST. define void @f4(i32 *%dst, i32 %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sty %r3, 4096(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%dst, i64 1024 @@ -40,7 +40,7 @@ define void @f4(i32 *%dst, i32 %val) { ; Check the high end of the aligned STY range. define void @f5(i32 *%dst, i32 %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sty %r3, 524284(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%dst, i64 131071 @@ -51,7 +51,7 @@ define void @f5(i32 *%dst, i32 %val) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f6(i32 *%dst, i32 %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, 524288 ; CHECK: st %r3, 0(%r2) ; CHECK: br %r14 @@ -62,7 +62,7 @@ define void @f6(i32 *%dst, i32 %val) { ; Check the high end of the negative aligned STY range. define void @f7(i32 *%dst, i32 %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sty %r3, -4(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%dst, i64 -1 @@ -72,7 +72,7 @@ define void @f7(i32 *%dst, i32 %val) { ; Check the low end of the STY range. define void @f8(i32 *%dst, i32 %val) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sty %r3, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i32 *%dst, i64 -131072 @@ -83,7 +83,7 @@ define void @f8(i32 *%dst, i32 %val) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f9(i32 *%dst, i32 %val) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r2, -524292 ; CHECK: st %r3, 0(%r2) ; CHECK: br %r14 @@ -94,7 +94,7 @@ define void @f9(i32 *%dst, i32 %val) { ; Check that ST allows an index. define void @f10(i64 %dst, i64 %index, i32 %val) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: st %r4, 4095(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %dst, %index @@ -106,7 +106,7 @@ define void @f10(i64 %dst, i64 %index, i32 %val) { ; Check that STY allows an index. define void @f11(i64 %dst, i64 %index, i32 %val) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: sty %r4, 4096(%r3,%r2) ; CHECK: br %r14 %add1 = add i64 %dst, %index diff --git a/test/CodeGen/SystemZ/int-move-07.ll b/test/CodeGen/SystemZ/int-move-07.ll index ab21ab0395343..5cac1e5b1a2ec 100644 --- a/test/CodeGen/SystemZ/int-move-07.ll +++ b/test/CodeGen/SystemZ/int-move-07.ll @@ -4,7 +4,7 @@ ; Check STG with no displacement. define void @f1(i64 *%dst, i64 %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: stg %r3, 0(%r2) ; CHECK: br %r14 store i64 %val, i64 *%dst @@ -13,7 +13,7 @@ define void @f1(i64 *%dst, i64 %val) { ; Check the high end of the aligned STG range. define void @f2(i64 *%dst, i64 %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: stg %r3, 524280(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%dst, i64 65535 @@ -24,7 +24,7 @@ define void @f2(i64 *%dst, i64 %val) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f3(i64 *%dst, i64 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: agfi %r2, 524288 ; CHECK: stg %r3, 0(%r2) ; CHECK: br %r14 @@ -35,7 +35,7 @@ define void @f3(i64 *%dst, i64 %val) { ; Check the high end of the negative aligned STG range. define void @f4(i64 *%dst, i64 %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: stg %r3, -8(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%dst, i64 -1 @@ -45,7 +45,7 @@ define void @f4(i64 *%dst, i64 %val) { ; Check the low end of the STG range. define void @f5(i64 *%dst, i64 %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: stg %r3, -524288(%r2) ; CHECK: br %r14 %ptr = getelementptr i64 *%dst, i64 -65536 @@ -56,7 +56,7 @@ define void @f5(i64 *%dst, i64 %val) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f6(i64 *%dst, i64 %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r2, -524296 ; CHECK: stg %r3, 0(%r2) ; CHECK: br %r14 @@ -67,7 +67,7 @@ define void @f6(i64 *%dst, i64 %val) { ; Check that STG allows an index. define void @f7(i64 %dst, i64 %index, i64 %val) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: stg %r4, 524287({{%r3,%r2|%r2,%r3}}) ; CHECK: br %r14 %add1 = add i64 %dst, %index diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll index 5640fec3299fa..56fcbc6d802b9 100644 --- a/test/CodeGen/SystemZ/int-move-08.ll +++ b/test/CodeGen/SystemZ/int-move-08.ll @@ -6,10 +6,16 @@ @gsrc32 = global i32 1 @gdst16 = global i16 2 @gdst32 = global i32 2 +@gsrc16u = global i16 1, align 1, section "foo" +@gsrc32u = global i32 1, align 2, section "foo" +@gdst16u = global i16 2, align 1, section "foo" +@gdst32u = global i32 2, align 2, section "foo" +@garray8 = global [2 x i8] [i8 100, i8 101] +@garray16 = global [2 x i16] [i16 102, i16 103] ; Check sign-extending loads from i16. define i32 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lhrl %r2, gsrc16 ; CHECK: br %r14 %val = load i16 *@gsrc16 @@ -19,7 +25,7 @@ define i32 @f1() { ; Check zero-extending loads from i16. define i32 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llhrl %r2, gsrc16 ; CHECK: br %r14 %val = load i16 *@gsrc16 @@ -29,7 +35,7 @@ define i32 @f2() { ; Check truncating 16-bit stores. define void @f3(i32 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sthrl %r2, gdst16 ; CHECK: br %r14 %half = trunc i32 %val to i16 @@ -39,7 +45,7 @@ define void @f3(i32 %val) { ; Check plain loads and stores. define void @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lrl %r0, gsrc32 ; CHECK: strl %r0, gdst32 ; CHECK: br %r14 @@ -47,3 +53,82 @@ define void @f4() { store i32 %val, i32 *@gdst32 ret void } + +; Repeat f1 with an unaligned variable. +define i32 @f5() { +; CHECK-LABEL: f5: +; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u +; CHECK: lh %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i16 *@gsrc16u, align 1 + %ext = sext i16 %val to i32 + ret i32 %ext +} + +; Repeat f2 with an unaligned variable. +define i32 @f6() { +; CHECK-LABEL: f6: +; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u +; CHECK: llh %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i16 *@gsrc16u, align 1 + %ext = zext i16 %val to i32 + ret i32 %ext +} + +; Repeat f3 with an unaligned variable. +define void @f7(i32 %val) { +; CHECK-LABEL: f7: +; CHECK: lgrl [[REG:%r[0-5]]], gdst16u +; CHECK: sth %r2, 0([[REG]]) +; CHECK: br %r14 + %half = trunc i32 %val to i16 + store i16 %half, i16 *@gdst16u, align 1 + ret void +} + +; Repeat f4 with unaligned variables. +define void @f8() { +; CHECK-LABEL: f8: +; CHECK: larl [[REG:%r[0-5]]], gsrc32u +; CHECK: l [[VAL:%r[0-5]]], 0([[REG]]) +; CHECK: larl [[REG:%r[0-5]]], gdst32u +; CHECK: st [[VAL]], 0([[REG]]) +; CHECK: br %r14 + %val = load i32 *@gsrc32u, align 2 + store i32 %val, i32 *@gdst32u, align 2 + ret void +} + +; Test a case where we want to use one LARL for accesses to two different +; parts of a variable. +define void @f9() { +; CHECK-LABEL: f9: +; CHECK: larl [[REG:%r[0-5]]], garray8 +; CHECK: llc [[VAL:%r[0-5]]], 0([[REG]]) +; CHECK: srl [[VAL]], 1 +; CHECK: stc [[VAL]], 1([[REG]]) +; CHECK: br %r14 + %ptr1 = getelementptr [2 x i8] *@garray8, i64 0, i64 0 + %ptr2 = getelementptr [2 x i8] *@garray8, i64 0, i64 1 + %val = load i8 *%ptr1 + %shr = lshr i8 %val, 1 + store i8 %shr, i8 *%ptr2 + ret void +} + +; Test a case where we want to use separate relative-long addresses for +; two different parts of a variable. +define void @f10() { +; CHECK-LABEL: f10: +; CHECK: llhrl [[VAL:%r[0-5]]], garray16 +; CHECK: srl [[VAL]], 1 +; CHECK: sthrl [[VAL]], garray16+2 +; CHECK: br %r14 + %ptr1 = getelementptr [2 x i16] *@garray16, i64 0, i64 0 + %ptr2 = getelementptr [2 x i16] *@garray16, i64 0, i64 1 + %val = load i16 *%ptr1 + %shr = lshr i16 %val, 1 + store i16 %shr, i16 *%ptr2 + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll index a7a8c82951f5e..b5c9cb13d288a 100644 --- a/test/CodeGen/SystemZ/int-move-09.ll +++ b/test/CodeGen/SystemZ/int-move-09.ll @@ -8,10 +8,16 @@ @gdst16 = global i16 2 @gdst32 = global i32 2 @gdst64 = global i64 2 +@gsrc16u = global i16 1, align 1, section "foo" +@gsrc32u = global i32 1, align 2, section "foo" +@gsrc64u = global i64 1, align 4, section "foo" +@gdst16u = global i16 2, align 1, section "foo" +@gdst32u = global i32 2, align 2, section "foo" +@gdst64u = global i64 2, align 4, section "foo" ; Check sign-extending loads from i16. define i64 @f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lghrl %r2, gsrc16 ; CHECK: br %r14 %val = load i16 *@gsrc16 @@ -21,7 +27,7 @@ define i64 @f1() { ; Check zero-extending loads from i16. define i64 @f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: llghrl %r2, gsrc16 ; CHECK: br %r14 %val = load i16 *@gsrc16 @@ -31,7 +37,7 @@ define i64 @f2() { ; Check sign-extending loads from i32. define i64 @f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgfrl %r2, gsrc32 ; CHECK: br %r14 %val = load i32 *@gsrc32 @@ -41,7 +47,7 @@ define i64 @f3() { ; Check zero-extending loads from i32. define i64 @f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: llgfrl %r2, gsrc32 ; CHECK: br %r14 %val = load i32 *@gsrc32 @@ -51,7 +57,7 @@ define i64 @f4() { ; Check truncating 16-bit stores. define void @f5(i64 %val) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sthrl %r2, gdst16 ; CHECK: br %r14 %half = trunc i64 %val to i16 @@ -61,7 +67,7 @@ define void @f5(i64 %val) { ; Check truncating 32-bit stores. define void @f6(i64 %val) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: strl %r2, gdst32 ; CHECK: br %r14 %word = trunc i64 %val to i32 @@ -71,7 +77,7 @@ define void @f6(i64 %val) { ; Check plain loads and stores. define void @f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgrl %r0, gsrc64 ; CHECK: stgrl %r0, gdst64 ; CHECK: br %r14 @@ -79,3 +85,82 @@ define void @f7() { store i64 %val, i64 *@gdst64 ret void } + +; Repeat f1 with an unaligned variable. +define i64 @f8() { +; CHECK-LABEL: f8: +; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT +; CHECK: lgh %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i16 *@gsrc16u, align 1 + %ext = sext i16 %val to i64 + ret i64 %ext +} + +; Repeat f2 with an unaligned variable. +define i64 @f9() { +; CHECK-LABEL: f9: +; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT +; CHECK: llgh %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i16 *@gsrc16u, align 1 + %ext = zext i16 %val to i64 + ret i64 %ext +} + +; Repeat f3 with an unaligned variable. +define i64 @f10() { +; CHECK-LABEL: f10: +; CHECK: larl [[REG:%r[0-5]]], gsrc32u +; CHECK: lgf %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i32 *@gsrc32u, align 2 + %ext = sext i32 %val to i64 + ret i64 %ext +} + +; Repeat f4 with an unaligned variable. +define i64 @f11() { +; CHECK-LABEL: f11: +; CHECK: larl [[REG:%r[0-5]]], gsrc32u +; CHECK: llgf %r2, 0([[REG]]) +; CHECK: br %r14 + %val = load i32 *@gsrc32u, align 2 + %ext = zext i32 %val to i64 + ret i64 %ext +} + +; Repeat f5 with an unaligned variable. +define void @f12(i64 %val) { +; CHECK-LABEL: f12: +; CHECK: lgrl [[REG:%r[0-5]]], gdst16u@GOT +; CHECK: sth %r2, 0([[REG]]) +; CHECK: br %r14 + %half = trunc i64 %val to i16 + store i16 %half, i16 *@gdst16u, align 1 + ret void +} + +; Repeat f6 with an unaligned variable. +define void @f13(i64 %val) { +; CHECK-LABEL: f13: +; CHECK: larl [[REG:%r[0-5]]], gdst32u +; CHECK: st %r2, 0([[REG]]) +; CHECK: br %r14 + %word = trunc i64 %val to i32 + store i32 %word, i32 *@gdst32u, align 2 + ret void +} + +; Repeat f7 with unaligned variables. +define void @f14() { +; CHECK-LABEL: f14: +; CHECK: larl [[REG:%r[0-5]]], gsrc64u +; CHECK: lg [[VAL:%r[0-5]]], 0([[REG]]) +; CHECK: larl [[REG:%r[0-5]]], gdst64u +; CHECK: stg [[VAL]], 0([[REG]]) +; CHECK: br %r14 + %val = load i64 *@gsrc64u, align 4 + store i64 %val, i64 *@gdst64u, align 4 + ret void +} diff --git a/test/CodeGen/SystemZ/int-mul-01.ll b/test/CodeGen/SystemZ/int-mul-01.ll index e1246e2156e3e..d5f7155f8c48a 100644 --- a/test/CodeGen/SystemZ/int-mul-01.ll +++ b/test/CodeGen/SystemZ/int-mul-01.ll @@ -5,7 +5,7 @@ ; Check the low end of the MH range. define i32 @f1(i32 %lhs, i16 *%src) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mh %r2, 0(%r3) ; CHECK: br %r14 %half = load i16 *%src @@ -16,7 +16,7 @@ define i32 @f1(i32 %lhs, i16 *%src) { ; Check the high end of the aligned MH range. define i32 @f2(i32 %lhs, i16 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mh %r2, 4094(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2047 @@ -28,7 +28,7 @@ define i32 @f2(i32 %lhs, i16 *%src) { ; Check the next halfword up, which should use MHY instead of MH. define i32 @f3(i32 %lhs, i16 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mhy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 2048 @@ -40,7 +40,7 @@ define i32 @f3(i32 %lhs, i16 *%src) { ; Check the high end of the aligned MHY range. define i32 @f4(i32 %lhs, i16 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: mhy %r2, 524286(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 262143 @@ -53,7 +53,7 @@ define i32 @f4(i32 %lhs, i16 *%src) { ; Check the next halfword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f5(i32 %lhs, i16 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: agfi %r3, 524288 ; CHECK: mh %r2, 0(%r3) ; CHECK: br %r14 @@ -66,7 +66,7 @@ define i32 @f5(i32 %lhs, i16 *%src) { ; Check the high end of the negative aligned MHY range. define i32 @f6(i32 %lhs, i16 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: mhy %r2, -2(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -1 @@ -78,7 +78,7 @@ define i32 @f6(i32 %lhs, i16 *%src) { ; Check the low end of the MHY range. define i32 @f7(i32 %lhs, i16 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mhy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i16 *%src, i64 -262144 @@ -91,7 +91,7 @@ define i32 @f7(i32 %lhs, i16 *%src) { ; Check the next halfword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f8(i32 %lhs, i16 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r3, -524290 ; CHECK: mh %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +104,7 @@ define i32 @f8(i32 %lhs, i16 *%src) { ; Check that MH allows an index. define i32 @f9(i32 %lhs, i64 %src, i64 %index) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: mh %r2, 4094({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -118,7 +118,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) { ; Check that MHY allows an index. define i32 @f10(i32 %lhs, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: mhy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index diff --git a/test/CodeGen/SystemZ/int-mul-02.ll b/test/CodeGen/SystemZ/int-mul-02.ll index d39c4dd0961cb..d002a7f2f9bd2 100644 --- a/test/CodeGen/SystemZ/int-mul-02.ll +++ b/test/CodeGen/SystemZ/int-mul-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @foo() + ; Check MSR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msr %r2, %r3 ; CHECK: br %r14 %mul = mul i32 %a, %b @@ -13,7 +15,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the MS range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: ms %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +25,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned MS range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: ms %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +36,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use MSY instead of MS. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: msy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +47,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned MSY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: msy %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +59,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: ms %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +71,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned MSY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: msy %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +82,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the MSY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: msy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +94,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: ms %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +106,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that MS allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ms %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +119,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that MSY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: msy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +129,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %mul = mul i32 %a, %b ret i32 %mul } + +; Check that multiplications of spilled values can use MS rather than MSR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: ms %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %mul0 = mul i32 %ret, %val0 + %mul1 = mul i32 %mul0, %val1 + %mul2 = mul i32 %mul1, %val2 + %mul3 = mul i32 %mul2, %val3 + %mul4 = mul i32 %mul3, %val4 + %mul5 = mul i32 %mul4, %val5 + %mul6 = mul i32 %mul5, %val6 + %mul7 = mul i32 %mul6, %val7 + %mul8 = mul i32 %mul7, %val8 + %mul9 = mul i32 %mul8, %val9 + + ret i32 %mul9 +} diff --git a/test/CodeGen/SystemZ/int-mul-03.ll b/test/CodeGen/SystemZ/int-mul-03.ll index ab4ef9edd2353..df18050d02420 100644 --- a/test/CodeGen/SystemZ/int-mul-03.ll +++ b/test/CodeGen/SystemZ/int-mul-03.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check MSGFR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msgfr %r2, %r3 ; CHECK: br %r14 %bext = sext i32 %b to i64 @@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; Check MSGF with no displacement. define i64 @f2(i64 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: msgf %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) { ; Check the high end of the aligned MSGF range. define i64 @f3(i64 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: msgf %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: msgf %r2, 0(%r3) ; CHECK: br %r14 @@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) { ; Check the high end of the negative aligned MSGF range. define i64 @f5(i64 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: msgf %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) { ; Check the low end of the MSGF range. define i64 @f6(i64 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: msgf %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524292 ; CHECK: msgf %r2, 0(%r3) ; CHECK: br %r14 @@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; Check that MSGF allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: msgf %r2, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %mul = mul i64 %a, %bext ret i64 %mul } + +; Check that multiplications of spilled values can use MSGF rather than MSGFR. +define i64 @f9(i32 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: msgf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %mul0 = mul i64 %ret, %ext0 + %mul1 = mul i64 %mul0, %ext1 + %mul2 = mul i64 %mul1, %ext2 + %mul3 = mul i64 %mul2, %ext3 + %mul4 = mul i64 %mul3, %ext4 + %mul5 = mul i64 %mul4, %ext5 + %mul6 = mul i64 %mul5, %ext6 + %mul7 = mul i64 %mul6, %ext7 + %mul8 = mul i64 %mul7, %ext8 + %mul9 = mul i64 %mul8, %ext9 + + ret i64 %mul9 +} diff --git a/test/CodeGen/SystemZ/int-mul-04.ll b/test/CodeGen/SystemZ/int-mul-04.ll index 94c263978341f..183a9a748c37a 100644 --- a/test/CodeGen/SystemZ/int-mul-04.ll +++ b/test/CodeGen/SystemZ/int-mul-04.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check MSGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msgr %r2, %r3 ; CHECK: br %r14 %mul = mul i64 %a, %b @@ -13,7 +15,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check MSG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: msg %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +25,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned MSG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: msg %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +37,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: msg %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +49,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned MSG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: msg %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +60,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the MSG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: msg %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +72,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: msg %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +84,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that MSG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: msg %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +94,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %mul = mul i64 %a, %b ret i64 %mul } + +; Check that multiplications of spilled values can use MSG rather than MSGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: msg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %mul0 = mul i64 %ret, %val0 + %mul1 = mul i64 %mul0, %val1 + %mul2 = mul i64 %mul1, %val2 + %mul3 = mul i64 %mul2, %val3 + %mul4 = mul i64 %mul3, %val4 + %mul5 = mul i64 %mul4, %val5 + %mul6 = mul i64 %mul5, %val6 + %mul7 = mul i64 %mul6, %val7 + %mul8 = mul i64 %mul7, %val8 + %mul9 = mul i64 %mul8, %val9 + + ret i64 %mul9 +} diff --git a/test/CodeGen/SystemZ/int-mul-05.ll b/test/CodeGen/SystemZ/int-mul-05.ll index 5e4031b5d77d6..93f140d84504f 100644 --- a/test/CodeGen/SystemZ/int-mul-05.ll +++ b/test/CodeGen/SystemZ/int-mul-05.ll @@ -4,7 +4,7 @@ ; Check multiplication by 2, which should use shifts. define i32 @f1(i32 %a, i32 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sll %r2, 1 ; CHECK: br %r14 %mul = mul i32 %a, 2 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a, i32 *%dest) { ; Check multiplication by 3. define i32 @f2(i32 %a, i32 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mhi %r2, 3 ; CHECK: br %r14 %mul = mul i32 %a, 3 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a, i32 *%dest) { ; Check the high end of the MHI range. define i32 @f3(i32 %a, i32 *%dest) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mhi %r2, 32767 ; CHECK: br %r14 %mul = mul i32 %a, 32767 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a, i32 *%dest) { ; Check the next value up, which should use shifts. define i32 @f4(i32 %a, i32 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sll %r2, 15 ; CHECK: br %r14 %mul = mul i32 %a, 32768 @@ -40,7 +40,7 @@ define i32 @f4(i32 %a, i32 *%dest) { ; Check the next value up again, which can use MSFI. define i32 @f5(i32 %a, i32 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: msfi %r2, 32769 ; CHECK: br %r14 %mul = mul i32 %a, 32769 @@ -49,7 +49,7 @@ define i32 @f5(i32 %a, i32 *%dest) { ; Check the high end of the MSFI range. define i32 @f6(i32 %a, i32 *%dest) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: msfi %r2, 2147483647 ; CHECK: br %r14 %mul = mul i32 %a, 2147483647 @@ -58,7 +58,7 @@ define i32 @f6(i32 %a, i32 *%dest) { ; Check the next value up, which should use shifts. define i32 @f7(i32 %a, i32 *%dest) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sll %r2, 31 ; CHECK: br %r14 %mul = mul i32 %a, 2147483648 @@ -67,7 +67,7 @@ define i32 @f7(i32 %a, i32 *%dest) { ; Check the next value up again, which is treated as a negative value. define i32 @f8(i32 %a, i32 *%dest) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: msfi %r2, -2147483647 ; CHECK: br %r14 %mul = mul i32 %a, 2147483649 @@ -76,7 +76,7 @@ define i32 @f8(i32 %a, i32 *%dest) { ; Check multiplication by -1, which is a negation. define i32 @f9(i32 %a, i32 *%dest) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lcr %r2, %r2 ; CHECK: br %r14 %mul = mul i32 %a, -1 @@ -85,7 +85,7 @@ define i32 @f9(i32 %a, i32 *%dest) { ; Check multiplication by -2, which should use shifts. define i32 @f10(i32 %a, i32 *%dest) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: sll %r2, 1 ; CHECK: lcr %r2, %r2 ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i32 @f10(i32 %a, i32 *%dest) { ; Check multiplication by -3. define i32 @f11(i32 %a, i32 *%dest) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: mhi %r2, -3 ; CHECK: br %r14 %mul = mul i32 %a, -3 @@ -104,7 +104,7 @@ define i32 @f11(i32 %a, i32 *%dest) { ; Check the lowest useful MHI value. define i32 @f12(i32 %a, i32 *%dest) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: mhi %r2, -32767 ; CHECK: br %r14 %mul = mul i32 %a, -32767 @@ -113,7 +113,7 @@ define i32 @f12(i32 %a, i32 *%dest) { ; Check the next value down, which should use shifts. define i32 @f13(i32 %a, i32 *%dest) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: sll %r2, 15 ; CHECK: lcr %r2, %r2 ; CHECK: br %r14 @@ -123,7 +123,7 @@ define i32 @f13(i32 %a, i32 *%dest) { ; Check the next value down again, which can use MSFI. define i32 @f14(i32 %a, i32 *%dest) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: msfi %r2, -32769 ; CHECK: br %r14 %mul = mul i32 %a, -32769 @@ -132,7 +132,7 @@ define i32 @f14(i32 %a, i32 *%dest) { ; Check the lowest useful MSFI value. define i32 @f15(i32 %a, i32 *%dest) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: msfi %r2, -2147483647 ; CHECK: br %r14 %mul = mul i32 %a, -2147483647 @@ -141,7 +141,7 @@ define i32 @f15(i32 %a, i32 *%dest) { ; Check the next value down, which should use shifts. define i32 @f16(i32 %a, i32 *%dest) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: sll %r2, 31 ; CHECK-NOT: lcr ; CHECK: br %r14 @@ -151,7 +151,7 @@ define i32 @f16(i32 %a, i32 *%dest) { ; Check the next value down again, which is treated as a positive value. define i32 @f17(i32 %a, i32 *%dest) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: msfi %r2, 2147483647 ; CHECK: br %r14 %mul = mul i32 %a, -2147483649 diff --git a/test/CodeGen/SystemZ/int-mul-06.ll b/test/CodeGen/SystemZ/int-mul-06.ll index a3546059c0232..ae9f9c6e4db55 100644 --- a/test/CodeGen/SystemZ/int-mul-06.ll +++ b/test/CodeGen/SystemZ/int-mul-06.ll @@ -4,7 +4,7 @@ ; Check multiplication by 2, which should use shifts. define i64 @f1(i64 %a, i64 *%dest) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg %r2, %r2, 1 ; CHECK: br %r14 %mul = mul i64 %a, 2 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a, i64 *%dest) { ; Check multiplication by 3. define i64 @f2(i64 %a, i64 *%dest) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: mghi %r2, 3 ; CHECK: br %r14 %mul = mul i64 %a, 3 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a, i64 *%dest) { ; Check the high end of the MGHI range. define i64 @f3(i64 %a, i64 *%dest) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: mghi %r2, 32767 ; CHECK: br %r14 %mul = mul i64 %a, 32767 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a, i64 *%dest) { ; Check the next value up, which should use shifts. define i64 @f4(i64 %a, i64 *%dest) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sllg %r2, %r2, 15 ; CHECK: br %r14 %mul = mul i64 %a, 32768 @@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 *%dest) { ; Check the next value up again, which can use MSGFI. define i64 @f5(i64 %a, i64 *%dest) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: msgfi %r2, 32769 ; CHECK: br %r14 %mul = mul i64 %a, 32769 @@ -49,7 +49,7 @@ define i64 @f5(i64 %a, i64 *%dest) { ; Check the high end of the MSGFI range. define i64 @f6(i64 %a, i64 *%dest) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: msgfi %r2, 2147483647 ; CHECK: br %r14 %mul = mul i64 %a, 2147483647 @@ -58,7 +58,7 @@ define i64 @f6(i64 %a, i64 *%dest) { ; Check the next value up, which should use shifts. define i64 @f7(i64 %a, i64 *%dest) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r2, %r2, 31 ; CHECK: br %r14 %mul = mul i64 %a, 2147483648 @@ -67,7 +67,7 @@ define i64 @f7(i64 %a, i64 *%dest) { ; Check the next value up again, which cannot use a constant multiplicatoin. define i64 @f8(i64 %a, i64 *%dest) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK-NOT: msgfi ; CHECK: br %r14 %mul = mul i64 %a, 2147483649 @@ -76,7 +76,7 @@ define i64 @f8(i64 %a, i64 *%dest) { ; Check multiplication by -1, which is a negation. define i64 @f9(i64 %a, i64 *%dest) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: lcgr {{%r[0-5]}}, %r2 ; CHECK: br %r14 %mul = mul i64 %a, -1 @@ -85,7 +85,7 @@ define i64 @f9(i64 %a, i64 *%dest) { ; Check multiplication by -2, which should use shifts. define i64 @f10(i64 %a, i64 *%dest) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 1 ; CHECK: lcgr %r2, [[SHIFTED]] ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i64 @f10(i64 %a, i64 *%dest) { ; Check multiplication by -3. define i64 @f11(i64 %a, i64 *%dest) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: mghi %r2, -3 ; CHECK: br %r14 %mul = mul i64 %a, -3 @@ -104,7 +104,7 @@ define i64 @f11(i64 %a, i64 *%dest) { ; Check the lowest useful MGHI value. define i64 @f12(i64 %a, i64 *%dest) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: mghi %r2, -32767 ; CHECK: br %r14 %mul = mul i64 %a, -32767 @@ -113,7 +113,7 @@ define i64 @f12(i64 %a, i64 *%dest) { ; Check the next value down, which should use shifts. define i64 @f13(i64 %a, i64 *%dest) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 15 ; CHECK: lcgr %r2, [[SHIFTED]] ; CHECK: br %r14 @@ -123,7 +123,7 @@ define i64 @f13(i64 %a, i64 *%dest) { ; Check the next value down again, which can use MSGFI. define i64 @f14(i64 %a, i64 *%dest) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: msgfi %r2, -32769 ; CHECK: br %r14 %mul = mul i64 %a, -32769 @@ -132,7 +132,7 @@ define i64 @f14(i64 %a, i64 *%dest) { ; Check the lowest useful MSGFI value. define i64 @f15(i64 %a, i64 *%dest) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: msgfi %r2, -2147483647 ; CHECK: br %r14 %mul = mul i64 %a, -2147483647 @@ -141,7 +141,7 @@ define i64 @f15(i64 %a, i64 *%dest) { ; Check the next value down, which should use shifts. define i64 @f16(i64 %a, i64 *%dest) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 31 ; CHECK: lcgr %r2, [[SHIFTED]] ; CHECK: br %r14 @@ -151,7 +151,7 @@ define i64 @f16(i64 %a, i64 *%dest) { ; Check the next value down again, which cannot use constant multiplication define i64 @f17(i64 %a, i64 *%dest) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK-NOT: msgfi ; CHECK: br %r14 %mul = mul i64 %a, -2147483649 diff --git a/test/CodeGen/SystemZ/int-mul-07.ll b/test/CodeGen/SystemZ/int-mul-07.ll index 2459cc359930e..874f43dd398f1 100644 --- a/test/CodeGen/SystemZ/int-mul-07.ll +++ b/test/CodeGen/SystemZ/int-mul-07.ll @@ -7,7 +7,7 @@ ; Check zero-extended multiplication in which only the high part is used. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: msgr ; CHECK: br %r14 %ax = zext i32 %a to i64 @@ -20,7 +20,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check sign-extended multiplication in which only the high part is used. define i32 @f2(i32 %a, i32 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: msgfr ; CHECK: br %r14 %ax = sext i32 %a to i64 @@ -34,7 +34,7 @@ define i32 @f2(i32 %a, i32 %b) { ; Check zero-extended multiplication in which the result is split into ; high and low halves. define i32 @f3(i32 %a, i32 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: msgr ; CHECK: br %r14 %ax = zext i32 %a to i64 @@ -50,7 +50,7 @@ define i32 @f3(i32 %a, i32 %b) { ; Check sign-extended multiplication in which the result is split into ; high and low halves. define i32 @f4(i32 %a, i32 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: msgfr ; CHECK: br %r14 %ax = sext i32 %a to i64 diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll index 09ebe7a7b489a..90b26a4f3dde3 100644 --- a/test/CodeGen/SystemZ/int-mul-08.ll +++ b/test/CodeGen/SystemZ/int-mul-08.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check zero-extended multiplication in which only the high part is used. define i64 @f1(i64 %dummy, i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK-NOT: {{%r[234]}} ; CHECK: mlgr %r2, %r4 ; CHECK: br %r14 @@ -19,10 +21,14 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) { ; Check sign-extended multiplication in which only the high part is used. ; This needs a rather convoluted sequence. define i64 @f2(i64 %dummy, i64 %a, i64 %b) { -; CHECK: f2: -; CHECK: mlgr -; CHECK: agr -; CHECK: agr +; CHECK-LABEL: f2: +; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63 +; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63 +; CHECK-DAG: ngr [[RES1]], %r4 +; CHECK-DAG: ngr [[RES2]], %r3 +; CHECK-DAG: agr [[RES2]], [[RES1]] +; CHECK-DAG: mlgr %r2, %r4 +; CHECK: sgr %r2, [[RES2]] ; CHECK: br %r14 %ax = sext i64 %a to i128 %bx = sext i64 %b to i128 @@ -35,7 +41,7 @@ define i64 @f2(i64 %dummy, i64 %a, i64 %b) { ; Check zero-extended multiplication in which only part of the high half ; is used. define i64 @f3(i64 %dummy, i64 %a, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: {{%r[234]}} ; CHECK: mlgr %r2, %r4 ; CHECK: srlg %r2, %r2, 3 @@ -51,7 +57,7 @@ define i64 @f3(i64 %dummy, i64 %a, i64 %b) { ; Check zero-extended multiplication in which the result is split into ; high and low halves. define i64 @f4(i64 %dummy, i64 %a, i64 %b) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: {{%r[234]}} ; CHECK: mlgr %r2, %r4 ; CHECK: ogr %r2, %r3 @@ -68,7 +74,7 @@ define i64 @f4(i64 %dummy, i64 %a, i64 %b) { ; Check division by a constant, which should use multiplication instead. define i64 @f5(i64 %dummy, i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: mlgr %r2, ; CHECK: srlg %r2, %r2, ; CHECK: br %r14 @@ -78,7 +84,7 @@ define i64 @f5(i64 %dummy, i64 %a) { ; Check MLG with no displacement. define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK-NOT: {{%r[234]}} ; CHECK: mlg %r2, 0(%r4) ; CHECK: br %r14 @@ -93,7 +99,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the aligned MLG range. define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: mlg %r2, 524280(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -109,7 +115,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword up, which requires separate address logic. ; Other sequences besides this one would be OK. define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r4, 524288 ; CHECK: mlg %r2, 0(%r4) ; CHECK: br %r14 @@ -125,7 +131,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { ; Check the high end of the negative aligned MLG range. define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: mlg %r2, -8(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -140,7 +146,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { ; Check the low end of the MLG range. define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: mlg %r2, -524288(%r4) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -156,7 +162,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f11(i64 *%dest, i64 %a, i64 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r4, -524296 ; CHECK: mlg %r2, 0(%r4) ; CHECK: br %r14 @@ -172,7 +178,7 @@ define i64 @f11(i64 *%dest, i64 %a, i64 *%src) { ; Check that MLG allows an index. define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: mlg %r2, 524287(%r5,%r4) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -186,3 +192,77 @@ define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) { %high = trunc i128 %highx to i64 ret i64 %high } + +; Check that multiplications of spilled values can use MLG rather than MLGR. +define i64 @f13(i64 *%ptr0) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, foo@PLT +; CHECK: mlg {{%r[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %retx = zext i64 %ret to i128 + %val0x = zext i64 %val0 to i128 + %mul0d = mul i128 %retx, %val0x + %mul0x = lshr i128 %mul0d, 64 + + %val1x = zext i64 %val1 to i128 + %mul1d = mul i128 %mul0x, %val1x + %mul1x = lshr i128 %mul1d, 64 + + %val2x = zext i64 %val2 to i128 + %mul2d = mul i128 %mul1x, %val2x + %mul2x = lshr i128 %mul2d, 64 + + %val3x = zext i64 %val3 to i128 + %mul3d = mul i128 %mul2x, %val3x + %mul3x = lshr i128 %mul3d, 64 + + %val4x = zext i64 %val4 to i128 + %mul4d = mul i128 %mul3x, %val4x + %mul4x = lshr i128 %mul4d, 64 + + %val5x = zext i64 %val5 to i128 + %mul5d = mul i128 %mul4x, %val5x + %mul5x = lshr i128 %mul5d, 64 + + %val6x = zext i64 %val6 to i128 + %mul6d = mul i128 %mul5x, %val6x + %mul6x = lshr i128 %mul6d, 64 + + %val7x = zext i64 %val7 to i128 + %mul7d = mul i128 %mul6x, %val7x + %mul7x = lshr i128 %mul7d, 64 + + %val8x = zext i64 %val8 to i128 + %mul8d = mul i128 %mul7x, %val8x + %mul8x = lshr i128 %mul8d, 64 + + %val9x = zext i64 %val9 to i128 + %mul9d = mul i128 %mul8x, %val9x + %mul9x = lshr i128 %mul9d, 64 + + %mul9 = trunc i128 %mul9x to i64 + ret i64 %mul9 +} diff --git a/test/CodeGen/SystemZ/int-neg-01.ll b/test/CodeGen/SystemZ/int-neg-01.ll index 6114f4efbc9ac..a342fa7922011 100644 --- a/test/CodeGen/SystemZ/int-neg-01.ll +++ b/test/CodeGen/SystemZ/int-neg-01.ll @@ -4,7 +4,7 @@ ; Test i32->i32 negation. define i32 @f1(i32 %val) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lcr %r2, %r2 ; CHECK: br %r14 %neg = sub i32 0, %val @@ -13,7 +13,7 @@ define i32 @f1(i32 %val) { ; Test i32->i64 negation. define i64 @f2(i32 %val) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lcgfr %r2, %r2 ; CHECK: br %r14 %ext = sext i32 %val to i64 @@ -23,7 +23,7 @@ define i64 @f2(i32 %val) { ; Test i32->i64 negation that uses an "in-register" form of sign extension. define i64 @f3(i64 %val) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lcgfr %r2, %r2 ; CHECK: br %r14 %trunc = trunc i64 %val to i32 @@ -34,7 +34,7 @@ define i64 @f3(i64 %val) { ; Test i64 negation. define i64 @f4(i64 %val) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lcgr %r2, %r2 ; CHECK: br %r14 %neg = sub i64 0, %val diff --git a/test/CodeGen/SystemZ/int-neg-02.ll b/test/CodeGen/SystemZ/int-neg-02.ll new file mode 100644 index 0000000000000..e26194c162d42 --- /dev/null +++ b/test/CodeGen/SystemZ/int-neg-02.ll @@ -0,0 +1,91 @@ +; Test negative integer absolute. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i32->i32 negative absolute using slt. +define i32 @f1(i32 %val) { +; CHECK-LABEL: f1: +; CHECK: lnr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp slt i32 %val, 0 + %neg = sub i32 0, %val + %abs = select i1 %cmp, i32 %neg, i32 %val + %res = sub i32 0, %abs + ret i32 %res +} + +; Test i32->i32 negative absolute using sle. +define i32 @f2(i32 %val) { +; CHECK-LABEL: f2: +; CHECK: lnr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sle i32 %val, 0 + %neg = sub i32 0, %val + %abs = select i1 %cmp, i32 %neg, i32 %val + %res = sub i32 0, %abs + ret i32 %res +} + +; Test i32->i32 negative absolute using sgt. +define i32 @f3(i32 %val) { +; CHECK-LABEL: f3: +; CHECK: lnr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sgt i32 %val, 0 + %neg = sub i32 0, %val + %abs = select i1 %cmp, i32 %val, i32 %neg + %res = sub i32 0, %abs + ret i32 %res +} + +; Test i32->i32 negative absolute using sge. +define i32 @f4(i32 %val) { +; CHECK-LABEL: f4: +; CHECK: lnr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp sge i32 %val, 0 + %neg = sub i32 0, %val + %abs = select i1 %cmp, i32 %val, i32 %neg + %res = sub i32 0, %abs + ret i32 %res +} + +; Test i32->i64 negative absolute. +define i64 @f5(i32 %val) { +; CHECK-LABEL: f5: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %cmp = icmp slt i64 %ext, 0 + %neg = sub i64 0, %ext + %abs = select i1 %cmp, i64 %neg, i64 %ext + %res = sub i64 0, %abs + ret i64 %res +} + +; Test i32->i64 negative absolute that uses an "in-register" form of +; sign extension. +define i64 @f6(i64 %val) { +; CHECK-LABEL: f6: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %trunc = trunc i64 %val to i32 + %ext = sext i32 %trunc to i64 + %cmp = icmp slt i64 %ext, 0 + %neg = sub i64 0, %ext + %abs = select i1 %cmp, i64 %neg, i64 %ext + %res = sub i64 0, %abs + ret i64 %res +} + +; Test i64 negative absolute. +define i64 @f7(i64 %val) { +; CHECK-LABEL: f7: +; CHECK: lngr %r2, %r2 +; CHECK: br %r14 + %cmp = icmp slt i64 %val, 0 + %neg = sub i64 0, %val + %abs = select i1 %cmp, i64 %neg, i64 %val + %res = sub i64 0, %abs + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-sub-01.ll b/test/CodeGen/SystemZ/int-sub-01.ll index 9a738148f7ef7..8d1e56ddcabab 100644 --- a/test/CodeGen/SystemZ/int-sub-01.ll +++ b/test/CodeGen/SystemZ/int-sub-01.ll @@ -1,10 +1,13 @@ ; Test 32-bit subtraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() ; Check SR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sr %r2, %r3 ; CHECK: br %r14 %sub = sub i32 %a, %b @@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the S range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: s %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned S range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: s %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use SY instead of S. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned SY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sy %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: s %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned SY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sy %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the SY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: s %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that S allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: s %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that SY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: sy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %sub = sub i32 %a, %b ret i32 %sub } + +; Check that subtractions of spilled values can use S rather than SR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: s %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %sub0 = sub i32 %ret, %val0 + %sub1 = sub i32 %sub0, %val1 + %sub2 = sub i32 %sub1, %val2 + %sub3 = sub i32 %sub2, %val3 + %sub4 = sub i32 %sub3, %val4 + %sub5 = sub i32 %sub4, %val5 + %sub6 = sub i32 %sub5, %val6 + %sub7 = sub i32 %sub6, %val7 + %sub8 = sub i32 %sub7, %val8 + %sub9 = sub i32 %sub8, %val9 + + ret i32 %sub9 +} diff --git a/test/CodeGen/SystemZ/int-sub-02.ll b/test/CodeGen/SystemZ/int-sub-02.ll index 5150a960a5546..a1c5ec50ee9c0 100644 --- a/test/CodeGen/SystemZ/int-sub-02.ll +++ b/test/CodeGen/SystemZ/int-sub-02.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check SGFR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sgfr %r2, %r3 ; CHECK: br %r14 %bext = sext i32 %b to i64 @@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; Check SGF with no displacement. define i64 @f2(i64 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sgf %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) { ; Check the high end of the aligned SGF range. define i64 @f3(i64 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sgf %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: sgf %r2, 0(%r3) ; CHECK: br %r14 @@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) { ; Check the high end of the negative aligned SGF range. define i64 @f5(i64 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sgf %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) { ; Check the low end of the SGF range. define i64 @f6(i64 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sgf %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524292 ; CHECK: sgf %r2, 0(%r3) ; CHECK: br %r14 @@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; Check that SGF allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sgf %r2, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %sub = sub i64 %a, %bext ret i64 %sub } + +; Check that subtractions of spilled values can use SGF rather than SGFR. +define i64 @f9(i32 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: sgf %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %sub0 = sub i64 %ret, %ext0 + %sub1 = sub i64 %sub0, %ext1 + %sub2 = sub i64 %sub1, %ext2 + %sub3 = sub i64 %sub2, %ext3 + %sub4 = sub i64 %sub3, %ext4 + %sub5 = sub i64 %sub4, %ext5 + %sub6 = sub i64 %sub5, %ext6 + %sub7 = sub i64 %sub6, %ext7 + %sub8 = sub i64 %sub7, %ext8 + %sub9 = sub i64 %sub8, %ext9 + + ret i64 %sub9 +} diff --git a/test/CodeGen/SystemZ/int-sub-03.ll b/test/CodeGen/SystemZ/int-sub-03.ll index 73571b3591f59..44edd84bda4ff 100644 --- a/test/CodeGen/SystemZ/int-sub-03.ll +++ b/test/CodeGen/SystemZ/int-sub-03.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @foo() + ; Check SLGFR. define i64 @f1(i64 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: slgfr %r2, %r3 ; CHECK: br %r14 %bext = zext i32 %b to i64 @@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) { ; Check SLGF with no displacement. define i64 @f2(i64 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: slgf %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) { ; Check the high end of the aligned SLGF range. define i64 @f3(i64 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: slgf %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: slgf %r2, 0(%r3) ; CHECK: br %r14 @@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) { ; Check the high end of the negative aligned SLGF range. define i64 @f5(i64 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: slgf %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) { ; Check the low end of the SLGF range. define i64 @f6(i64 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: slgf %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524292 ; CHECK: slgf %r2, 0(%r3) ; CHECK: br %r14 @@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) { ; Check that SLGF allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: slgf %r2, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %sub = sub i64 %a, %bext ret i64 %sub } + +; Check that subtractions of spilled values can use SLGF rather than SLGFR. +define i64 @f9(i32 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: slgf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %sub0 = sub i64 %ret, %ext0 + %sub1 = sub i64 %sub0, %ext1 + %sub2 = sub i64 %sub1, %ext2 + %sub3 = sub i64 %sub2, %ext3 + %sub4 = sub i64 %sub3, %ext4 + %sub5 = sub i64 %sub4, %ext5 + %sub6 = sub i64 %sub5, %ext6 + %sub7 = sub i64 %sub6, %ext7 + %sub8 = sub i64 %sub7, %ext8 + %sub9 = sub i64 %sub8, %ext9 + + ret i64 %sub9 +} diff --git a/test/CodeGen/SystemZ/int-sub-04.ll b/test/CodeGen/SystemZ/int-sub-04.ll index 545d342168097..85104536c5d74 100644 --- a/test/CodeGen/SystemZ/int-sub-04.ll +++ b/test/CodeGen/SystemZ/int-sub-04.ll @@ -1,10 +1,13 @@ ; Test 64-bit subtraction in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() ; Check SGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sgr %r2, %r3 ; CHECK: br %r14 %sub = sub i64 %a, %b @@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check SG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sg %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned SG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: sg %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: sg %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned SG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sg %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the SG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sg %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: sg %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that SG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sg %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %sub = sub i64 %a, %b ret i64 %sub } + +; Check that subtractions of spilled values can use SG rather than SGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: sg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %sub0 = sub i64 %ret, %val0 + %sub1 = sub i64 %sub0, %val1 + %sub2 = sub i64 %sub1, %val2 + %sub3 = sub i64 %sub2, %val3 + %sub4 = sub i64 %sub3, %val4 + %sub5 = sub i64 %sub4, %val5 + %sub6 = sub i64 %sub5, %val6 + %sub7 = sub i64 %sub6, %val7 + %sub8 = sub i64 %sub7, %val8 + %sub9 = sub i64 %sub8, %val9 + + ret i64 %sub9 +} diff --git a/test/CodeGen/SystemZ/int-sub-05.ll b/test/CodeGen/SystemZ/int-sub-05.ll index 1475b244f6786..85ea14cd15f35 100644 --- a/test/CodeGen/SystemZ/int-sub-05.ll +++ b/test/CodeGen/SystemZ/int-sub-05.ll @@ -1,10 +1,13 @@ -; Test 128-bit addition in which the second operand is variable. +; Test 128-bit subtraction in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i128 *@foo() ; Test register addition. define void @f1(i128 *%ptr, i64 %high, i64 %low) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: slgr {{%r[0-5]}}, %r4 ; CHECK: slbgr {{%r[0-5]}}, %r3 ; CHECK: br %r14 @@ -20,7 +23,7 @@ define void @f1(i128 *%ptr, i64 %high, i64 %low) { ; Test memory addition with no offset. define void @f2(i64 %addr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: slg {{%r[0-5]}}, 8(%r2) ; CHECK: slbg {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 @@ -35,7 +38,7 @@ define void @f2(i64 %addr) { ; Test the highest aligned offset that is in range of both SLG and SLBG. define void @f3(i64 %base) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: slg {{%r[0-5]}}, 524280(%r2) ; CHECK: slbg {{%r[0-5]}}, 524272(%r2) ; CHECK: br %r14 @@ -51,7 +54,7 @@ define void @f3(i64 %base) { ; Test the next doubleword up, which requires separate address logic for SLG. define void @f4(i64 %base) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgr [[BASE:%r[1-5]]], %r2 ; CHECK: agfi [[BASE]], 524288 ; CHECK: slg {{%r[0-5]}}, 0([[BASE]]) @@ -71,7 +74,7 @@ define void @f4(i64 %base) { ; both instructions. It would be better to create an anchor at 524288 ; that both instructions can use, but that isn't implemented yet. define void @f5(i64 %base) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 @@ -87,7 +90,7 @@ define void @f5(i64 %base) { ; Test the lowest displacement that is in range of both SLG and SLBG. define void @f6(i64 %base) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: slg {{%r[0-5]}}, -524280(%r2) ; CHECK: slbg {{%r[0-5]}}, -524288(%r2) ; CHECK: br %r14 @@ -103,7 +106,7 @@ define void @f6(i64 %base) { ; Test the next doubleword down, which is out of range of the SLBG. define void @f7(i64 %base) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: slg {{%r[0-5]}}, -524288(%r2) ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 @@ -116,3 +119,35 @@ define void @f7(i64 %base) { store i128 %sub, i128 *%aptr ret void } + +; Check that subtractions of spilled values can use SLG and SLBG rather than +; SLGR and SLBGR. +define void @f8(i128 *%ptr0) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, foo@PLT +; CHECK: slg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: slbg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i128 *%ptr0, i128 2 + %ptr2 = getelementptr i128 *%ptr0, i128 4 + %ptr3 = getelementptr i128 *%ptr0, i128 6 + %ptr4 = getelementptr i128 *%ptr0, i128 8 + + %val0 = load i128 *%ptr0 + %val1 = load i128 *%ptr1 + %val2 = load i128 *%ptr2 + %val3 = load i128 *%ptr3 + %val4 = load i128 *%ptr4 + + %retptr = call i128 *@foo() + + %ret = load i128 *%retptr + %sub0 = sub i128 %ret, %val0 + %sub1 = sub i128 %sub0, %val1 + %sub2 = sub i128 %sub1, %val2 + %sub3 = sub i128 %sub2, %val3 + %sub4 = sub i128 %sub3, %val4 + store i128 %sub4, i128 *%retptr + + ret void +} diff --git a/test/CodeGen/SystemZ/int-sub-06.ll b/test/CodeGen/SystemZ/int-sub-06.ll index 0e04d51e2bc74..395d584b23dec 100644 --- a/test/CodeGen/SystemZ/int-sub-06.ll +++ b/test/CodeGen/SystemZ/int-sub-06.ll @@ -5,7 +5,7 @@ ; Check register additions. The XOR ensures that we don't instead zero-extend ; %b into a register and use memory addition. define void @f1(i128 *%aptr, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: slgfr {{%r[0-5]}}, %r3 ; CHECK: slbgr ; CHECK: br %r14 @@ -19,7 +19,7 @@ define void @f1(i128 *%aptr, i32 %b) { ; Like f1, but using an "in-register" extension. define void @f2(i128 *%aptr, i64 %b) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: slgfr {{%r[0-5]}}, %r3 ; CHECK: slbgr ; CHECK: br %r14 @@ -35,7 +35,7 @@ define void @f2(i128 *%aptr, i64 %b) { ; Test register addition in cases where the second operand is zero extended ; from i64 rather than i32, but is later masked to i32 range. define void @f3(i128 *%aptr, i64 %b) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: slgfr {{%r[0-5]}}, %r3 ; CHECK: slbgr ; CHECK: br %r14 @@ -50,7 +50,7 @@ define void @f3(i128 *%aptr, i64 %b) { ; Test SLGF with no offset. define void @f4(i128 *%aptr, i32 *%bsrc) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: slgf {{%r[0-5]}}, 0(%r3) ; CHECK: slbgr ; CHECK: br %r14 @@ -65,7 +65,7 @@ define void @f4(i128 *%aptr, i32 *%bsrc) { ; Check the high end of the SLGF range. define void @f5(i128 *%aptr, i32 *%bsrc) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: slgf {{%r[0-5]}}, 524284(%r3) ; CHECK: slbgr ; CHECK: br %r14 @@ -82,7 +82,7 @@ define void @f5(i128 *%aptr, i32 *%bsrc) { ; Check the next word up, which must use separate address logic. ; Other sequences besides this one would be OK. define void @f6(i128 *%aptr, i32 *%bsrc) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: slgf {{%r[0-5]}}, 0(%r3) ; CHECK: slbgr @@ -99,7 +99,7 @@ define void @f6(i128 *%aptr, i32 *%bsrc) { ; Check the high end of the negative aligned SLGF range. define void @f7(i128 *%aptr, i32 *%bsrc) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: slgf {{%r[0-5]}}, -4(%r3) ; CHECK: slbgr ; CHECK: br %r14 @@ -115,7 +115,7 @@ define void @f7(i128 *%aptr, i32 *%bsrc) { ; Check the low end of the SLGF range. define void @f8(i128 *%aptr, i32 *%bsrc) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: slgf {{%r[0-5]}}, -524288(%r3) ; CHECK: slbgr ; CHECK: br %r14 @@ -132,7 +132,7 @@ define void @f8(i128 *%aptr, i32 *%bsrc) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f9(i128 *%aptr, i32 *%bsrc) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: slgf {{%r[0-5]}}, 0(%r3) ; CHECK: slbgr @@ -149,7 +149,7 @@ define void @f9(i128 *%aptr, i32 *%bsrc) { ; Check that SLGF allows an index. define void @f10(i128 *%aptr, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %a = load i128 *%aptr diff --git a/test/CodeGen/SystemZ/int-sub-07.ll b/test/CodeGen/SystemZ/int-sub-07.ll new file mode 100644 index 0000000000000..5c1f42c1cc969 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-07.ll @@ -0,0 +1,131 @@ +; Test 32-bit subtraction in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SH range. +define i32 @f1(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f1: +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned SH range. +define i32 @f2(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f2: +; CHECK: sh %r2, 4094(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which should use SHY instead of SH. +define i32 @f3(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f3: +; CHECK: shy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned SHY range. +define i32 @f4(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f4: +; CHECK: shy %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f5: +; CHECK: agfi %r3, 524288 +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the negative aligned SHY range. +define i32 @f6(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f6: +; CHECK: shy %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the low end of the SHY range. +define i32 @f7(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f7: +; CHECK: shy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %lhs, i16 *%src) { +; CHECK-LABEL: f8: +; CHECK: agfi %r3, -524290 +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check that SH allows an index. +define i32 @f9(i32 %lhs, i64 %src, i64 %index) { +; CHECK-LABEL: f9: +; CHECK: sh %r2, 4094({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %sub1 = add i64 %src, %index + %sub2 = add i64 %sub1, 4094 + %ptr = inttoptr i64 %sub2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check that SHY allows an index. +define i32 @f10(i32 %lhs, i64 %src, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: shy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %sub1 = add i64 %src, %index + %sub2 = add i64 %sub1, 4096 + %ptr = inttoptr i64 %sub2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-sub-08.ll b/test/CodeGen/SystemZ/int-sub-08.ll new file mode 100644 index 0000000000000..f0a5e1e063a16 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-08.ll @@ -0,0 +1,39 @@ +; Test the three-operand forms of subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check SRK. +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f1: +; CHECK: srk %r2, %r3, %r4 +; CHECK: br %r14 + %sub = sub i32 %b, %c + ret i32 %sub +} + +; Check that we can still use SR in obvious cases. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: sr %r2, %r3 +; CHECK: br %r14 + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check SGRK. +define i64 @f3(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: f3: +; CHECK: sgrk %r2, %r3, %r4 +; CHECK: br %r14 + %sub = sub i64 %b, %c + ret i64 %sub +} + +; Check that we can still use SGR in obvious cases. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: sgr %r2, %r3 +; CHECK: br %r14 + %sub = sub i64 %a, %b + ret i64 %sub +} diff --git a/test/CodeGen/SystemZ/int-sub-09.ll b/test/CodeGen/SystemZ/int-sub-09.ll new file mode 100644 index 0000000000000..00a60d3819c95 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-09.ll @@ -0,0 +1,22 @@ +; Test 128-bit subtraction when the distinct-operands facility is available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test the case where both operands are in registers. +define i64 @f1(i64 %a, i64 %b, i64 %c, i64 %d, i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: slgrk %r2, %r4, %r5 +; CHECK: slbgr +; CHECK: br %r14 + %x1 = insertelement <2 x i64> undef, i64 %b, i32 0 + %x2 = insertelement <2 x i64> %x1, i64 %c, i32 1 + %x = bitcast <2 x i64> %x2 to i128 + %y2 = insertelement <2 x i64> %x1, i64 %d, i32 1 + %y = bitcast <2 x i64> %y2 to i128 + %sub = sub i128 %x, %y + %subv = bitcast i128 %sub to <2 x i64> + %high = extractelement <2 x i64> %subv, i32 0 + store i64 %high, i64 *%ptr + %low = extractelement <2 x i64> %subv, i32 1 + ret i64 %low +} diff --git a/test/CodeGen/SystemZ/la-01.ll b/test/CodeGen/SystemZ/la-01.ll index b43e3f8662dc7..31d2041251708 100644 --- a/test/CodeGen/SystemZ/la-01.ll +++ b/test/CodeGen/SystemZ/la-01.ll @@ -15,9 +15,11 @@ define void @df() { ret void } +declare void @foo(i32 *) + ; Test a load of a fully-aligned external variable. define i32 *@f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: larl %r2, e4 ; CHECK-NEXT: br %r14 ret i32 *@e4 @@ -25,7 +27,7 @@ define i32 *@f1() { ; Test a load of a fully-aligned local variable. define i32 *@f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: larl %r2, d4 ; CHECK-NEXT: br %r14 ret i32 *@d4 @@ -33,7 +35,7 @@ define i32 *@f2() { ; Test a load of a 2-byte-aligned external variable. define i32 *@f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: larl %r2, e2 ; CHECK-NEXT: br %r14 ret i32 *@e2 @@ -41,7 +43,7 @@ define i32 *@f3() { ; Test a load of a 2-byte-aligned local variable. define i32 *@f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: larl %r2, d2 ; CHECK-NEXT: br %r14 ret i32 *@d2 @@ -49,7 +51,7 @@ define i32 *@f4() { ; Test a load of an unaligned external variable, which must go via the GOT. define i32 *@f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lgrl %r2, e1@GOT ; CHECK-NEXT: br %r14 ret i32 *@e1 @@ -57,7 +59,7 @@ define i32 *@f5() { ; Test a load of an unaligned local variable, which must go via the GOT. define i32 *@f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgrl %r2, d1@GOT ; CHECK-NEXT: br %r14 ret i32 *@d1 @@ -65,7 +67,7 @@ define i32 *@f6() { ; Test a load of an external function. define void() *@f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: larl %r2, ef ; CHECK-NEXT: br %r14 ret void() *@ef @@ -73,8 +75,21 @@ define void() *@f7() { ; Test a load of a local function. define void() *@f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: larl %r2, df ; CHECK-NEXT: br %r14 ret void() *@df } + +; Test that LARL can be rematerialized. +define i32 @f9() { +; CHECK-LABEL: f9: +; CHECK: larl %r2, d2 +; CHECK: brasl %r14, foo@PLT +; CHECK: larl %r2, d2 +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + call void @foo(i32 *@d2) + call void @foo(i32 *@d2) + ret i32 0 +} diff --git a/test/CodeGen/SystemZ/la-02.ll b/test/CodeGen/SystemZ/la-02.ll index 4c5374a0925bb..d7362d67e3bf6 100644 --- a/test/CodeGen/SystemZ/la-02.ll +++ b/test/CodeGen/SystemZ/la-02.ll @@ -23,7 +23,7 @@ define hidden void @hf() { ; Test loads of external variables. There is no guarantee that the ; variable will be in range of LARL. define i32 *@f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lgrl %r2, ev@GOT ; CHECK: br %r14 ret i32 *@ev @@ -31,7 +31,7 @@ define i32 *@f1() { ; ...likewise locally-defined normal-visibility variables. define i32 *@f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lgrl %r2, dv@GOT ; CHECK: br %r14 ret i32 *@dv @@ -39,7 +39,7 @@ define i32 *@f2() { ; ...likewise protected variables. define i32 *@f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: lgrl %r2, pv@GOT ; CHECK: br %r14 ret i32 *@pv @@ -47,7 +47,7 @@ define i32 *@f3() { ; ...likewise hidden variables. define i32 *@f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: lgrl %r2, hv@GOT ; CHECK: br %r14 ret i32 *@hv @@ -56,7 +56,7 @@ define i32 *@f4() { ; Check loads of external functions. This could use LARL, but we don't have ; code to detect that yet. define void() *@f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lgrl %r2, ef@GOT ; CHECK: br %r14 ret void() *@ef @@ -64,7 +64,7 @@ define void() *@f5() { ; ...likewise locally-defined normal-visibility functions. define void() *@f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgrl %r2, df@GOT ; CHECK: br %r14 ret void() *@df @@ -72,7 +72,7 @@ define void() *@f6() { ; ...likewise protected functions. define void() *@f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: lgrl %r2, pf@GOT ; CHECK: br %r14 ret void() *@pf @@ -80,7 +80,7 @@ define void() *@f7() { ; ...likewise hidden functions. define void() *@f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: lgrl %r2, hf@GOT ; CHECK: br %r14 ret void() *@hf diff --git a/test/CodeGen/SystemZ/la-03.ll b/test/CodeGen/SystemZ/la-03.ll index 9449b2bfbec0c..1ff3fefde6c93 100644 --- a/test/CodeGen/SystemZ/la-03.ll +++ b/test/CodeGen/SystemZ/la-03.ll @@ -20,7 +20,7 @@ define hidden void @hf() { ; Test loads of external variables, which must go via the GOT. define i32 *@f1() { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: lgrl %r2, ev@GOT ; CHECK: br %r14 ret i32 *@ev @@ -29,7 +29,7 @@ define i32 *@f1() { ; Check loads of locally-defined normal-visibility variables, which might ; be overridden. The load must go via the GOT. define i32 *@f2() { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: lgrl %r2, dv@GOT ; CHECK: br %r14 ret i32 *@dv @@ -38,7 +38,7 @@ define i32 *@f2() { ; Check loads of protected variables, which in the small code model ; must be in range of LARL. define i32 *@f3() { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: larl %r2, pv ; CHECK: br %r14 ret i32 *@pv @@ -46,7 +46,7 @@ define i32 *@f3() { ; ...likewise hidden variables. define i32 *@f4() { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: larl %r2, hv ; CHECK: br %r14 ret i32 *@hv @@ -54,7 +54,7 @@ define i32 *@f4() { ; Like f1, but for functions. define void() *@f5() { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: lgrl %r2, ef@GOT ; CHECK: br %r14 ret void() *@ef @@ -62,7 +62,7 @@ define void() *@f5() { ; Like f2, but for functions. define void() *@f6() { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: lgrl %r2, df@GOT ; CHECK: br %r14 ret void() *@df @@ -70,7 +70,7 @@ define void() *@f6() { ; Like f3, but for functions. define void() *@f7() { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: larl %r2, pf ; CHECK: br %r14 ret void() *@pf @@ -78,7 +78,7 @@ define void() *@f7() { ; Like f4, but for functions. define void() *@f8() { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: larl %r2, hf ; CHECK: br %r14 ret void() *@hf diff --git a/test/CodeGen/SystemZ/la-04.ll b/test/CodeGen/SystemZ/la-04.ll index 4c3636481e7d7..4d47308e04ba6 100644 --- a/test/CodeGen/SystemZ/la-04.ll +++ b/test/CodeGen/SystemZ/la-04.ll @@ -4,7 +4,7 @@ ; Do some arbitrary work and return the address of the following label. define i8 *@f1(i8 *%addr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: mvi 0(%r2), 1 ; CHECK: [[LABEL:\.L.*]]: ; CHECK: larl %r2, [[LABEL]] diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg index 79528d178f231..b12af09434bea 100644 --- a/test/CodeGen/SystemZ/lit.local.cfg +++ b/test/CodeGen/SystemZ/lit.local.cfg @@ -1,5 +1,3 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - targets = set(config.root.targets_to_build.split()) if not 'SystemZ' in targets: config.unsupported = True diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll new file mode 100644 index 0000000000000..580080173563e --- /dev/null +++ b/test/CodeGen/SystemZ/loop-01.ll @@ -0,0 +1,124 @@ +; Test loop tuning. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; Test that strength reduction is applied to addresses with a scale factor, +; but that indexed addressing can still be used. +define void @f1(i32 *%dest, i32 %a) { +; CHECK-LABEL: f1: +; CHECK-NOT: sllg +; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}}) +; CHECK: br %r14 +entry: + br label %loop + +loop: + %index = phi i64 [ 0, %entry ], [ %next, %loop ] + %ptr = getelementptr i32 *%dest, i64 %index + store i32 %a, i32 *%ptr + %next = add i64 %index, 1 + %cmp = icmp ne i64 %next, 100 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Test a loop that should be converted into dbr form and then use BRCT. +define void @f2(i32 *%src, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi [[REG:%r[0-5]]], 100 +; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop +; CHECK: brct [[REG]], [[LABEL]] +; CHECK: br %r14 +entry: + br label %loop + +loop: + %count = phi i32 [ 0, %entry ], [ %next, %loop.next ] + %next = add i32 %count, 1 + %val = load volatile i32 *%src + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i32 %val, 1 + store volatile i32 %add, i32 *%dest + br label %loop.next + +loop.next: + %cont = icmp ne i32 %next, 100 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; Like f2, but for BRCTG. +define void @f3(i64 *%src, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lghi [[REG:%r[0-5]]], 100 +; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop +; CHECK: brctg [[REG]], [[LABEL]] +; CHECK: br %r14 +entry: + br label %loop + +loop: + %count = phi i64 [ 0, %entry ], [ %next, %loop.next ] + %next = add i64 %count, 1 + %val = load volatile i64 *%src + %cmp = icmp eq i64 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i64 %val, 1 + store volatile i64 %add, i64 *%dest + br label %loop.next + +loop.next: + %cont = icmp ne i64 %next, 100 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; Test a loop with a 64-bit decremented counter in which the 32-bit +; low part of the counter is used after the decrement. This is an example +; of a subregister use being the only thing that blocks a conversion to BRCTG. +define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) { +; CHECK-LABEL: f4: +; CHECK: aghi [[REG:%r[0-5]]], -1 +; CHECK: lr [[REG2:%r[0-5]]], [[REG]] +; CHECK: stg [[REG2]], +; CHECK: jne {{\..*}} +; CHECK: br %r14 +entry: + br label %loop + +loop: + %left = phi i64 [ %count, %entry ], [ %next, %loop.next ] + store volatile i64 %left, i64 *%dest2 + %val = load volatile i32 *%src + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i32 %val, 1 + store volatile i32 %add, i32 *%dest + br label %loop.next + +loop.next: + %next = add i64 %left, -1 + %ext = zext i32 %val to i64 + %shl = shl i64 %ext, 32 + %and = and i64 %next, 4294967295 + %or = or i64 %shl, %and + store volatile i64 %or, i64 *%dest2 + %cont = icmp ne i64 %next, 0 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll new file mode 100644 index 0000000000000..c51690b9848d5 --- /dev/null +++ b/test/CodeGen/SystemZ/memchr-01.ll @@ -0,0 +1,21 @@ +; Test memchr using SRST, with a weird but usable prototype. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@memchr(i8 *%src, i16 %char, i32 %len) + +; Test a simple forwarded call. +define i8 *@f1(i8 *%src, i16 %char, i32 %len) { +; CHECK-LABEL: f1: +; CHECK-DAG: lgr [[REG:%r[1-5]]], %r2 +; CHECK-DAG: algfr %r2, %r4 +; CHECK-DAG: llcr %r0, %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK: jl {{\.L.*}} +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len) + ret i8 *%res +} diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll new file mode 100644 index 0000000000000..982b3964f190f --- /dev/null +++ b/test/CodeGen/SystemZ/memchr-02.ll @@ -0,0 +1,57 @@ +; Test memchr using SRST, with the correct prototype. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@memchr(i8 *%src, i32 %char, i64 %len) + +; Test a simple forwarded call. +define i8 *@f1(i64 %len, i8 *%src, i32 %char) { +; CHECK-LABEL: f1: +; CHECK-DAG: agr %r2, %r3 +; CHECK-DAG: llcr %r0, %r4 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: srst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK: jl {{\.L.*}} +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i32 %char, i64 %len) + ret i8 *%res +} + +; Test a doubled call with no use of %r0 in between. There should be a +; single load of %r0. +define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) { +; CHECK-LABEL: f2: +; CHECK: llc %r0, 0(%r3) +; CHECK-NOT: %r0 +; CHECK: srst [[RES1:%r[1-5]]], %r2 +; CHECK-NOT: %r0 +; CHECK: srst %r2, [[RES1]] +; CHECK: br %r14 + %char = load volatile i8 *%charptr + %charext = zext i8 %char to i32 + %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len) + %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len) + ret i8 *%res2 +} + +; Test a doubled call with a use of %r0 in between. %r0 must be loaded +; for each loop. +define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) { +; CHECK-LABEL: f3: +; CHECK: llc [[CHAR:%r[1-5]]], 0(%r3) +; CHECK: lr %r0, [[CHAR]] +; CHECK: srst [[RES1:%r[1-5]]], %r2 +; CHECK: lhi %r0, 0 +; CHECK: blah %r0 +; CHECK: lr %r0, [[CHAR]] +; CHECK: srst %r2, [[RES1]] +; CHECK: br %r14 + %char = load volatile i8 *%charptr + %charext = zext i8 %char to i32 + %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len) + call void asm sideeffect "blah $0", "{r0}" (i32 0) + %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len) + ret i8 *%res2 +} diff --git a/test/CodeGen/SystemZ/memcmp-01.ll b/test/CodeGen/SystemZ/memcmp-01.ll new file mode 100644 index 0000000000000..a01441946937a --- /dev/null +++ b/test/CodeGen/SystemZ/memcmp-01.ll @@ -0,0 +1,221 @@ +; Test memcmp using CLC, with i32 results. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lhi %r2, 0 +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0) + ret i32 %res +} + +; Check a case where the result is used as an integer. +define i32 @f2(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f2: +; CHECK: clc 0(2,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll %r2, [[REG]], 31 +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK: clc 0(3,%r2), 0(%r3) +; CHECK-NEXT: je {{\..*}} +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f4: +; CHECK: clc 0(4,%r2), 0(%r3) +; CHECK-NEXT: jlh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4) + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f5: +; CHECK: clc 0(5,%r2), 0(%r3) +; CHECK-NEXT: jl {{\..*}} +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5) + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f6: +; CHECK: clc 0(6,%r2), 0(%r3) +; CHECK-NEXT: jh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6) + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check the upper end of the CLC range. Here the result is used both as +; an integer and for branching. +define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f7: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll %r2, [[REG]], 31 +; CHECK: jl {{.L*}} +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256) + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; 257 bytes needs two CLCs. +define i32 @f8(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f8: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(1,%r2), 256(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) + ret i32 %res +} + +; Test a comparison of 258 bytes in which the CC result can be used directly. +define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f9: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(1,%r2), 256(%r3) +; CHECK: [[LABEL]]: +; CHECK-NEXT: jl .L +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test the largest size that can use two CLCs. +define i32 @f10(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f10: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512) + ret i32 %res +} + +; Test the smallest size that needs 3 CLCs. +define i32 @f11(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f11: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: jlh [[LABEL]] +; CHECK: clc 512(1,%r2), 512(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513) + ret i32 %res +} + +; Test the largest size than can use 3 CLCs. +define i32 @f12(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f12: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: jlh [[LABEL]] +; CHECK: clc 512(256,%r2), 512(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768) + ret i32 %res +} + +; The next size up uses a loop instead. We leave the more complicated +; loop tests to memcpy-01.ll, which shares the same form. +define i32 @f13(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f13: +; CHECK: lghi [[COUNT:%r[0-5]]], 3 +; CHECK: [[LOOP:.L[^:]*]]: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK-DAG: la %r2, 256(%r2) +; CHECK-DAG: la %r3, 256(%r3) +; CHECK: brctg [[COUNT]], [[LOOP]] +; CHECK: clc 0(1,%r2), 0(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769) + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/memcmp-02.ll b/test/CodeGen/SystemZ/memcmp-02.ll new file mode 100644 index 0000000000000..74b090dcdd8e5 --- /dev/null +++ b/test/CodeGen/SystemZ/memcmp-02.ll @@ -0,0 +1,139 @@ +; Test memcmp using CLC, with i64 results. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @memcmp(i8 *%src1, i8 *%src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i64 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 0) + ret i64 %res +} + +; Check a case where the result is used as an integer. +define i64 @f2(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f2: +; CHECK: clc 0(2,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2) + ret i64 %res +} + +; Check a case where the result is tested for equality. +define void @f3(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: clc 0(3,%r2), 0(%r3) +; CHECK-NEXT: je {{\..*}} +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 3) + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f4: +; CHECK: clc 0(4,%r2), 0(%r3) +; CHECK-NEXT: jlh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 4) + %cmp = icmp ne i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f5: +; CHECK: clc 0(5,%r2), 0(%r3) +; CHECK-NEXT: jl {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 5) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f6: +; CHECK: clc 0(6,%r2), 0(%r3) +; CHECK-NEXT: jh {{\..*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 6) + %cmp = icmp sgt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Check the upper end of the CLC range. Here the result is used both as +; an integer and for branching. +define i64 @f7(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f7: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: jl {{.L*}} +; CHECK: br %r14 +entry: + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 256) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; 257 bytes needs two CLCs. +define i64 @f8(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f8: +; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: jlh [[LABEL:\..*]] +; CHECK: clc 256(1,%r2), 256(%r3) +; CHECK: [[LABEL]]: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: br %r14 + %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 257) + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll new file mode 100644 index 0000000000000..b53ec5452e25c --- /dev/null +++ b/test/CodeGen/SystemZ/memcpy-01.ll @@ -0,0 +1,235 @@ +; Test memcpy using MVC. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind +declare void @foo(i8 *, i8 *) + +; Test a no-op move, i32 version. +define void @f1(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1, + i1 false) + ret void +} + +; Test a no-op move, i64 version. +define void @f2(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1, + i1 false) + ret void +} + +; Test a 1-byte move, i32 version. +define void @f3(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f3: +; CHECK: mvc 0(1,%r2), 0(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1, + i1 false) + ret void +} + +; Test a 1-byte move, i64 version. +define void @f4(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f4: +; CHECK: mvc 0(1,%r2), 0(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1, + i1 false) + ret void +} + +; Test the upper range of a single MVC, i32 version. +define void @f5(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f5: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1, + i1 false) + ret void +} + +; Test the upper range of a single MVC, i64 version. +define void @f6(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f6: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1, + i1 false) + ret void +} + +; Test the first case that needs two MVCs. +define void @f7(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f7: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: mvc 256(1,%r2), 256(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1, + i1 false) + ret void +} + +; Test the last-but-one case that needs two MVCs. +define void @f8(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f8: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: mvc 256(255,%r2), 256(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1, + i1 false) + ret void +} + +; Test the last case that needs two MVCs. +define void @f9(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f9: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: mvc 256(256,%r2), 256(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1, + i1 false) + ret void +} + +; Test an arbitrary value that uses straight-line code. +define void @f10(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f10: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: mvc 256(256,%r2), 256(%r3) +; CHECK: mvc 512(256,%r2), 512(%r3) +; CHECK: mvc 768(256,%r2), 768(%r3) +; CHECK: mvc 1024(255,%r2), 1024(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, + i1 false) + ret void +} + +; ...and again in cases where not all parts are in range of MVC. +define void @f11(i8 *%srcbase, i8 *%destbase) { +; CHECK-LABEL: f11: +; CHECK: mvc 4000(256,%r2), 3500(%r3) +; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2) +; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3) +; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3) +; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3) +; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]]) +; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]]) +; CHECK: br %r14 + %dest = getelementptr i8 *%srcbase, i64 4000 + %src = getelementptr i8* %destbase, i64 3500 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, + i1 false) + ret void +} + +; ...and again with a destination frame base that goes out of range. +define void @f12() { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc 4076(256,%r15), 2100(%r15) +; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15) +; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15) +; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15) +; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15) +; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %arr = alloca [6000 x i8] + %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900 + %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924 + call void @foo(i8 *%dest, i8 *%src) + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, + i1 false) + call void @foo(i8 *%dest, i8 *%src) + ret void +} + +; ...and again with a source frame base that goes out of range. +define void @f13() { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc 200(256,%r15), 3826(%r15) +; CHECK: mvc 456(256,%r15), 4082(%r15) +; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15) +; CHECK: mvc 712(256,%r15), 0([[NEWSRC]]) +; CHECK: mvc 968(256,%r15), 256([[NEWSRC]]) +; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]]) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %arr = alloca [6000 x i8] + %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24 + %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650 + call void @foo(i8 *%dest, i8 *%src) + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, + i1 false) + call void @foo(i8 *%dest, i8 *%src) + ret void +} + +; Test the last case that is done using straight-line code. +define void @f14(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f14: +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: mvc 256(256,%r2), 256(%r3) +; CHECK: mvc 512(256,%r2), 512(%r3) +; CHECK: mvc 768(256,%r2), 768(%r3) +; CHECK: mvc 1024(256,%r2), 1024(%r3) +; CHECK: mvc 1280(256,%r2), 1280(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1, + i1 false) + ret void +} + +; Test the first case that is done using a loop. +define void @f15(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f15: +; CHECK: lghi [[COUNT:%r[0-5]]], 6 +; CHECK: [[LABEL:\.L[^:]*]]: +; CHECK: pfd 2, 768(%r2) +; CHECK: mvc 0(256,%r2), 0(%r3) +; CHECK: la %r2, 256(%r2) +; CHECK: la %r3, 256(%r3) +; CHECK: brctg [[COUNT]], [[LABEL]] +; CHECK: mvc 0(1,%r2), 0(%r3) +; CHECK: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1, + i1 false) + ret void +} + +; ...and again with frame bases, where the base must be loaded into a +; register before the loop. +define void @f16() { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, foo@PLT +; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6 +; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15) +; CHECK: [[LABEL:\.L[^:]*]]: +; CHECK: pfd 2, 2368([[BASE]]) +; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]]) +; CHECK: la [[BASE]], 256([[BASE]]) +; CHECK: brctg [[COUNT]], [[LABEL]] +; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]]) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %arr = alloca [3200 x i8] + %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600 + %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0 + call void @foo(i8 *%dest, i8 *%src) + call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1, + i1 false) + call void @foo(i8 *%dest, i8 *%src) + ret void +} diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll new file mode 100644 index 0000000000000..2b010911f88e4 --- /dev/null +++ b/test/CodeGen/SystemZ/memcpy-02.ll @@ -0,0 +1,392 @@ +; Test load/store pairs that act as memcpys. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g1src = global i8 1 +@g1dst = global i8 1 +@g2src = global i16 2 +@g2dst = global i16 2 +@g3 = global i32 3 +@g4 = global i64 4 +@g5src = external global fp128, align 16 +@g5dst = external global fp128, align 16 + +; Test the simple i8 case. +define void @f1(i8 *%ptr1) { +; CHECK-LABEL: f1: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + store i8 %val, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is zero-extended to 32 bits. +define void @f2(i8 *%ptr1) { +; CHECK-LABEL: f2: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = zext i8 %val to i32 + %trunc = trunc i32 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is zero-extended to 64 bits. +define void @f3(i8 *%ptr1) { +; CHECK-LABEL: f3: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = zext i8 %val to i64 + %trunc = trunc i64 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is sign-extended to 32 bits. +define void @f4(i8 *%ptr1) { +; CHECK-LABEL: f4: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = sext i8 %val to i32 + %trunc = trunc i32 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is sign-extended to 64 bits. +define void @f5(i8 *%ptr1) { +; CHECK-LABEL: f5: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = sext i8 %val to i64 + %trunc = trunc i64 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test the simple i16 case. +define void @f6(i16 *%ptr1) { +; CHECK-LABEL: f6: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + store i16 %val, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is zero-extended to 32 bits. +define void @f7(i16 *%ptr1) { +; CHECK-LABEL: f7: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = zext i16 %val to i32 + %trunc = trunc i32 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is zero-extended to 64 bits. +define void @f8(i16 *%ptr1) { +; CHECK-LABEL: f8: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = zext i16 %val to i64 + %trunc = trunc i64 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is sign-extended to 32 bits. +define void @f9(i16 *%ptr1) { +; CHECK-LABEL: f9: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = sext i16 %val to i32 + %trunc = trunc i32 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is sign-extended to 64 bits. +define void @f10(i16 *%ptr1) { +; CHECK-LABEL: f10: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = sext i16 %val to i64 + %trunc = trunc i64 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test the simple i32 case. +define void @f11(i32 *%ptr1) { +; CHECK-LABEL: f11: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + store i32 %val, i32 *%ptr2 + ret void +} + +; Test i32 cases where the value is zero-extended to 64 bits. +define void @f12(i32 *%ptr1) { +; CHECK-LABEL: f12: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %ext = zext i32 %val to i64 + %trunc = trunc i64 %ext to i32 + store i32 %trunc, i32 *%ptr2 + ret void +} + +; Test i32 cases where the value is sign-extended to 64 bits. +define void @f13(i32 *%ptr1) { +; CHECK-LABEL: f13: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %ext = sext i32 %val to i64 + %trunc = trunc i64 %ext to i32 + store i32 %trunc, i32 *%ptr2 + ret void +} + +; Test the i64 case. +define void @f14(i64 *%ptr1) { +; CHECK-LABEL: f14: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; Test the f32 case. +define void @f15(float *%ptr1) { +; CHECK-LABEL: f15: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr float *%ptr1, i64 1 + %val = load float *%ptr1 + store float %val, float *%ptr2 + ret void +} + +; Test the f64 case. +define void @f16(double *%ptr1) { +; CHECK-LABEL: f16: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr double *%ptr1, i64 1 + %val = load double *%ptr1 + store double %val, double *%ptr2 + ret void +} + +; Test the f128 case. +define void @f17(fp128 *%ptr1) { +; CHECK-LABEL: f17: +; CHECK: mvc 16(16,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr fp128 *%ptr1, i64 1 + %val = load fp128 *%ptr1 + store fp128 %val, fp128 *%ptr2 + ret void +} + +; Make sure that we don't use MVC if the load is volatile. +define void @f18(i64 *%ptr1) { +; CHECK-LABEL: f18: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load volatile i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; ...likewise the store. +define void @f19(i64 *%ptr1) { +; CHECK-LABEL: f19: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + store volatile i64 %val, i64 *%ptr2 + ret void +} + +; Test that MVC is not used for aligned loads and stores if there is +; no way of telling whether they alias. We don't want to use MVC in +; cases where the addresses could be equal. +define void @f20(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f20: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val = load i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; ...and again for unaligned loads and stores. +define void @f21(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f21: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2 + store i64 %val, i64 *%ptr2, align 2 + ret void +} + +; Test a case where there is definite overlap. +define void @f22(i64 %base) { +; CHECK-LABEL: f22: +; CHECK-NOT: mvc +; CHECK: br %r14 + %add = add i64 %base, 1 + %ptr1 = inttoptr i64 %base to i64 * + %ptr2 = inttoptr i64 %add to i64 * + %val = load i64 *%ptr1, align 1 + store i64 %val, i64 *%ptr2, align 1 + ret void +} + +; Test that we can use MVC for global addresses for i8. +define void @f23(i8 *%ptr) { +; CHECK-LABEL: f23: +; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src +; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst +; CHECK: mvc 0(1,[[DST]]), 0([[SRC]]) +; CHECK: br %r14 + %val = load i8 *@g1src + store i8 %val, i8 *@g1dst + ret void +} + +; Test that we use LHRL and STHRL for i16. +define void @f24(i16 *%ptr) { +; CHECK-LABEL: f24: +; CHECK: lhrl [[REG:%r[0-5]]], g2src +; CHECK: sthrl [[REG]], g2dst +; CHECK: br %r14 + %val = load i16 *@g2src + store i16 %val, i16 *@g2dst + ret void +} + +; Test that we use LRL for i32. +define void @f25(i32 *%ptr) { +; CHECK-LABEL: f25: +; CHECK: lrl [[REG:%r[0-5]]], g3 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %val = load i32 *@g3 + store i32 %val, i32 *%ptr + ret void +} + +; ...likewise STRL. +define void @f26(i32 *%ptr) { +; CHECK-LABEL: f26: +; CHECK: l [[REG:%r[0-5]]], 0(%r2) +; CHECK: strl [[REG]], g3 +; CHECK: br %r14 + %val = load i32 *%ptr + store i32 %val, i32 *@g3 + ret void +} + +; Test that we use LGRL for i64. +define void @f27(i64 *%ptr) { +; CHECK-LABEL: f27: +; CHECK: lgrl [[REG:%r[0-5]]], g4 +; CHECK: stg [[REG]], 0(%r2) +; CHECK: br %r14 + %val = load i64 *@g4 + store i64 %val, i64 *%ptr + ret void +} + +; ...likewise STGRL. +define void @f28(i64 *%ptr) { +; CHECK-LABEL: f28: +; CHECK: lg [[REG:%r[0-5]]], 0(%r2) +; CHECK: stgrl [[REG]], g4 +; CHECK: br %r14 + %val = load i64 *%ptr + store i64 %val, i64 *@g4 + ret void +} + +; Test that we can use MVC for global addresses for fp128. +define void @f29(fp128 *%ptr) { +; CHECK-LABEL: f29: +; CHECK-DAG: larl [[SRC:%r[0-5]]], g5src +; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst +; CHECK: mvc 0(16,[[DST]]), 0([[SRC]]) +; CHECK: br %r14 + %val = load fp128 *@g5src, align 16 + store fp128 %val, fp128 *@g5dst, align 16 + ret void +} + +; Test a case where offset disambiguation is enough. +define void @f30(i64 *%ptr1) { +; CHECK-LABEL: f30: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1, align 1 + store i64 %val, i64 *%ptr2, align 1 + ret void +} + +; Test f21 in cases where TBAA tells us there is no alias. +define void @f31(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f31: +; CHECK: mvc 0(8,%r3), 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !1 + store i64 %val, i64 *%ptr2, align 2, !tbaa !2 + ret void +} + +; Test f21 in cases where TBAA is present but doesn't help. +define void @f32(i64 *%ptr1, i64 *%ptr2) { +; CHECK-LABEL: f32: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !1 + store i64 %val, i64 *%ptr2, align 2, !tbaa !1 + ret void +} + +!0 = metadata !{ metadata !"root" } +!1 = metadata !{ metadata !3, metadata !3, i64 0 } +!2 = metadata !{ metadata !4, metadata !4, i64 0 } +!3 = metadata !{ metadata !"set1", metadata !0 } +!4 = metadata !{ metadata !"set2", metadata !0 } diff --git a/test/CodeGen/SystemZ/memset-01.ll b/test/CodeGen/SystemZ/memset-01.ll new file mode 100644 index 0000000000000..f17901cc73ab4 --- /dev/null +++ b/test/CodeGen/SystemZ/memset-01.ll @@ -0,0 +1,160 @@ +; Test memset in cases where the set value is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest, i8 %val) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest, i8 %val) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest, i8 %val) { +; CHECK-LABEL: f3: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest, i8 %val) { +; CHECK-LABEL: f4: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest, i8 %val) { +; CHECK-LABEL: f5: +; CHECK-DAG: stc %r3, 0(%r2) +; CHECK-DAG: stc %r3, 1(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest, i8 %val) { +; CHECK-LABEL: f6: +; CHECK-DAG: stc %r3, 0(%r2) +; CHECK-DAG: stc %r3, 1(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest, i8 %val) { +; CHECK-LABEL: f7: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(2,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest, i8 %val) { +; CHECK-LABEL: f8: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(2,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 3, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f9(i8 *%dest, i8 %val) { +; CHECK-LABEL: f9: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f10(i8 *%dest, i8 %val) { +; CHECK-LABEL: f10: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. We need two MVCs. +define void @f11(i8 *%dest, i8 %val) { +; CHECK-LABEL: f11: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version. +define void @f12(i8 *%dest, i8 %val) { +; CHECK-LABEL: f12: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 258, i32 1, i1 false) + ret void +} + +; Test the largest case for which straight-line code is used. +define void @f13(i8 *%dest, i8 %val) { +; CHECK-LABEL: f13: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(256,%r2), 256(%r2) +; CHECK: mvc 513(256,%r2), 512(%r2) +; CHECK: mvc 769(256,%r2), 768(%r2) +; CHECK: mvc 1025(256,%r2), 1024(%r2) +; CHECK: mvc 1281(256,%r2), 1280(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1537, i32 1, + i1 false) + ret void +} + +; Test the next size up, which uses a loop. We leave the other corner +; cases to memcpy-01.ll. +define void @f14(i8 *%dest, i8 %val) { +; CHECK-LABEL: f14: +; CHECK: stc %r3, 0(%r2) +; CHECK: lghi [[COUNT:%r[0-5]]], 6 +; CHECK: [[LABEL:\.L[^:]*]]: +; CHECK: pfd 2, 769(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: la %r2, 256(%r2) +; CHECK: brctg [[COUNT]], [[LABEL]] +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1538, i32 1, + i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-02.ll b/test/CodeGen/SystemZ/memset-02.ll new file mode 100644 index 0000000000000..b4724c0b5745b --- /dev/null +++ b/test/CodeGen/SystemZ/memset-02.ll @@ -0,0 +1,162 @@ +; Test memset in cases where the set value is a constant other than 0 and -1. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK-LABEL: f3: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK-LABEL: f4: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK-LABEL: f5: +; CHECK: mvhhi 0(%r2), -32640 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK-LABEL: f6: +; CHECK: mvhhi 0(%r2), -32640 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK-LABEL: f7: +; CHECK-DAG: mvhhi 0(%r2), -32640 +; CHECK-DAG: mvi 2(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK-LABEL: f8: +; CHECK-DAG: mvhhi 0(%r2), -32640 +; CHECK-DAG: mvi 2(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK-LABEL: f9: +; CHECK: iilf [[REG:%r[0-5]]], 2155905152 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK-LABEL: f10: +; CHECK: iilf [[REG:%r[0-5]]], 2155905152 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK-LABEL: f11: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(4,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK-LABEL: f12: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(4,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 5, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK-LABEL: f13: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK-LABEL: f14: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. We need two MVCs. +define void @f15(i8 *%dest) { +; CHECK-LABEL: f15: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version. +define void @f16(i8 *%dest) { +; CHECK-LABEL: f16: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 258, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-03.ll b/test/CodeGen/SystemZ/memset-03.ll new file mode 100644 index 0000000000000..a95f89fc7c0ae --- /dev/null +++ b/test/CodeGen/SystemZ/memset-03.ll @@ -0,0 +1,382 @@ +; Test memsets that clear all bits. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK-LABEL: f3: +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK-LABEL: f4: +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK-LABEL: f5: +; CHECK: mvhhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK-LABEL: f6: +; CHECK: mvhhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK-LABEL: f7: +; CHECK-DAG: mvhhi 0(%r2), 0 +; CHECK-DAG: mvi 2(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK-LABEL: f8: +; CHECK-DAG: mvhhi 0(%r2), 0 +; CHECK-DAG: mvi 2(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK-LABEL: f9: +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK-LABEL: f10: +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK-LABEL: f11: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK-LABEL: f12: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 5, i32 1, i1 false) + ret void +} + +; 6 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK-LABEL: f13: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvhhi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 6, i32 1, i1 false) + ret void +} + +; 6 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK-LABEL: f14: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvhhi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 6, i32 1, i1 false) + ret void +} + +; 7 bytes, i32 version. +define void @f15(i8 *%dest) { +; CHECK-LABEL: f15: +; CHECK: xc 0(7,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 7, i32 1, i1 false) + ret void +} + +; 7 bytes, i64 version. +define void @f16(i8 *%dest) { +; CHECK-LABEL: f16: +; CHECK: xc 0(7,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 7, i32 1, i1 false) + ret void +} + +; 8 bytes, i32 version. +define void @f17(i8 *%dest) { +; CHECK-LABEL: f17: +; CHECK: mvghi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 8, i32 1, i1 false) + ret void +} + +; 8 bytes, i64 version. +define void @f18(i8 *%dest) { +; CHECK-LABEL: f18: +; CHECK: mvghi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 8, i32 1, i1 false) + ret void +} + +; 9 bytes, i32 version. +define void @f19(i8 *%dest) { +; CHECK-LABEL: f19: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 9, i32 1, i1 false) + ret void +} + +; 9 bytes, i64 version. +define void @f20(i8 *%dest) { +; CHECK-LABEL: f20: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 9, i32 1, i1 false) + ret void +} + +; 10 bytes, i32 version. +define void @f21(i8 *%dest) { +; CHECK-LABEL: f21: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 10, i32 1, i1 false) + ret void +} + +; 10 bytes, i64 version. +define void @f22(i8 *%dest) { +; CHECK-LABEL: f22: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 10, i32 1, i1 false) + ret void +} + +; 11 bytes, i32 version. +define void @f23(i8 *%dest) { +; CHECK-LABEL: f23: +; CHECK: xc 0(11,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 11, i32 1, i1 false) + ret void +} + +; 11 bytes, i64 version. +define void @f24(i8 *%dest) { +; CHECK-LABEL: f24: +; CHECK: xc 0(11,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 11, i32 1, i1 false) + ret void +} + +; 12 bytes, i32 version. +define void @f25(i8 *%dest) { +; CHECK-LABEL: f25: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false) + ret void +} + +; 12 bytes, i64 version. +define void @f26(i8 *%dest) { +; CHECK-LABEL: f26: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false) + ret void +} + +; 13 bytes, i32 version. +define void @f27(i8 *%dest) { +; CHECK-LABEL: f27: +; CHECK: xc 0(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 13, i32 1, i1 false) + ret void +} + +; 13 bytes, i64 version. +define void @f28(i8 *%dest) { +; CHECK-LABEL: f28: +; CHECK: xc 0(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 13, i32 1, i1 false) + ret void +} + +; 14 bytes, i32 version. +define void @f29(i8 *%dest) { +; CHECK-LABEL: f29: +; CHECK: xc 0(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 14, i32 1, i1 false) + ret void +} + +; 14 bytes, i64 version. +define void @f30(i8 *%dest) { +; CHECK-LABEL: f30: +; CHECK: xc 0(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 14, i32 1, i1 false) + ret void +} + +; 15 bytes, i32 version. +define void @f31(i8 *%dest) { +; CHECK-LABEL: f31: +; CHECK: xc 0(15,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 15, i32 1, i1 false) + ret void +} + +; 15 bytes, i64 version. +define void @f32(i8 *%dest) { +; CHECK-LABEL: f32: +; CHECK: xc 0(15,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 15, i32 1, i1 false) + ret void +} + +; 16 bytes, i32 version. +define void @f33(i8 *%dest) { +; CHECK-LABEL: f33: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvghi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 16, i32 1, i1 false) + ret void +} + +; 16 bytes, i64 version. +define void @f34(i8 *%dest) { +; CHECK-LABEL: f34: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvghi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 16, i32 1, i1 false) + ret void +} + +; 17 bytes, i32 version. +define void @f35(i8 *%dest) { +; CHECK-LABEL: f35: +; CHECK: xc 0(17,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 17, i32 1, i1 false) + ret void +} + +; 17 bytes, i64 version. +define void @f36(i8 *%dest) { +; CHECK-LABEL: f36: +; CHECK: xc 0(17,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 17, i32 1, i1 false) + ret void +} + +; 256 bytes, i32 version. +define void @f37(i8 *%dest) { +; CHECK-LABEL: f37: +; CHECK: xc 0(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 256, i32 1, i1 false) + ret void +} + +; 256 bytes, i64 version. +define void @f38(i8 *%dest) { +; CHECK-LABEL: f38: +; CHECK: xc 0(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 256, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. We need two MVCs. +define void @f39(i8 *%dest) { +; CHECK-LABEL: f39: +; CHECK: xc 0(256,%r2), 0(%r2) +; CHECK: xc 256(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f40(i8 *%dest) { +; CHECK-LABEL: f40: +; CHECK: xc 0(256,%r2), 0(%r2) +; CHECK: xc 256(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 257, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-04.ll b/test/CodeGen/SystemZ/memset-04.ll new file mode 100644 index 0000000000000..7906e8d10a1f1 --- /dev/null +++ b/test/CodeGen/SystemZ/memset-04.ll @@ -0,0 +1,398 @@ +; Test memsets that set all bits. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK-LABEL: f3: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK-LABEL: f4: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK-LABEL: f5: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK-LABEL: f6: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK-LABEL: f7: +; CHECK-DAG: mvhhi 0(%r2), -1 +; CHECK-DAG: mvi 2(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK-LABEL: f8: +; CHECK-DAG: mvhhi 0(%r2), -1 +; CHECK-DAG: mvi 2(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK-LABEL: f9: +; CHECK: mvhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK-LABEL: f10: +; CHECK: mvhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK-LABEL: f11: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvi 4(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK-LABEL: f12: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvi 4(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 5, i32 1, i1 false) + ret void +} + +; 6 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK-LABEL: f13: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvhhi 4(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 6, i32 1, i1 false) + ret void +} + +; 6 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK-LABEL: f14: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvhhi 4(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 6, i32 1, i1 false) + ret void +} + +; 7 bytes, i32 version. +define void @f15(i8 *%dest) { +; CHECK-LABEL: f15: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 7, i32 1, i1 false) + ret void +} + +; 7 bytes, i64 version. +define void @f16(i8 *%dest) { +; CHECK-LABEL: f16: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 7, i32 1, i1 false) + ret void +} + +; 8 bytes, i32 version. +define void @f17(i8 *%dest) { +; CHECK-LABEL: f17: +; CHECK: mvghi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 8, i32 1, i1 false) + ret void +} + +; 8 bytes, i64 version. +define void @f18(i8 *%dest) { +; CHECK-LABEL: f18: +; CHECK: mvghi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 8, i32 1, i1 false) + ret void +} + +; 9 bytes, i32 version. +define void @f19(i8 *%dest) { +; CHECK-LABEL: f19: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvi 8(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 9, i32 1, i1 false) + ret void +} + +; 9 bytes, i64 version. +define void @f20(i8 *%dest) { +; CHECK-LABEL: f20: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvi 8(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 9, i32 1, i1 false) + ret void +} + +; 10 bytes, i32 version. +define void @f21(i8 *%dest) { +; CHECK-LABEL: f21: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 10, i32 1, i1 false) + ret void +} + +; 10 bytes, i64 version. +define void @f22(i8 *%dest) { +; CHECK-LABEL: f22: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 10, i32 1, i1 false) + ret void +} + +; 11 bytes, i32 version. +define void @f23(i8 *%dest) { +; CHECK-LABEL: f23: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 11, i32 1, i1 false) + ret void +} + +; 11 bytes, i64 version. +define void @f24(i8 *%dest) { +; CHECK-LABEL: f24: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 11, i32 1, i1 false) + ret void +} + +; 12 bytes, i32 version. +define void @f25(i8 *%dest) { +; CHECK-LABEL: f25: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 12, i32 1, i1 false) + ret void +} + +; 12 bytes, i64 version. +define void @f26(i8 *%dest) { +; CHECK-LABEL: f26: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 12, i32 1, i1 false) + ret void +} + +; 13 bytes, i32 version. +define void @f27(i8 *%dest) { +; CHECK-LABEL: f27: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 13, i32 1, i1 false) + ret void +} + +; 13 bytes, i64 version. +define void @f28(i8 *%dest) { +; CHECK-LABEL: f28: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 13, i32 1, i1 false) + ret void +} + +; 14 bytes, i32 version. +define void @f29(i8 *%dest) { +; CHECK-LABEL: f29: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 14, i32 1, i1 false) + ret void +} + +; 14 bytes, i64 version. +define void @f30(i8 *%dest) { +; CHECK-LABEL: f30: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 14, i32 1, i1 false) + ret void +} + +; 15 bytes, i32 version. +define void @f31(i8 *%dest) { +; CHECK-LABEL: f31: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 15, i32 1, i1 false) + ret void +} + +; 15 bytes, i64 version. +define void @f32(i8 *%dest) { +; CHECK-LABEL: f32: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 15, i32 1, i1 false) + ret void +} + +; 16 bytes, i32 version. +define void @f33(i8 *%dest) { +; CHECK-LABEL: f33: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvghi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 16, i32 1, i1 false) + ret void +} + +; 16 bytes, i64 version. +define void @f34(i8 *%dest) { +; CHECK-LABEL: f34: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvghi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 16, i32 1, i1 false) + ret void +} + +; 17 bytes, i32 version. +define void @f35(i8 *%dest) { +; CHECK-LABEL: f35: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 17, i32 1, i1 false) + ret void +} + +; 17 bytes, i64 version. +define void @f36(i8 *%dest) { +; CHECK-LABEL: f36: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 17, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f37(i8 *%dest) { +; CHECK-LABEL: f37: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f38(i8 *%dest) { +; CHECK-LABEL: f38: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. We need two MVCs. +define void @f39(i8 *%dest) { +; CHECK-LABEL: f39: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version. +define void @f40(i8 *%dest) { +; CHECK-LABEL: f40: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: mvc 257(1,%r2), 256(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 258, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/or-01.ll b/test/CodeGen/SystemZ/or-01.ll index 20c93129efca5..23946d320678b 100644 --- a/test/CodeGen/SystemZ/or-01.ll +++ b/test/CodeGen/SystemZ/or-01.ll @@ -1,10 +1,13 @@ ; Test 32-bit ORs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() ; Check OR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: or %r2, %r3 ; CHECK: br %r14 %or = or i32 %a, %b @@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the O range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: o %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned O range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: o %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use OY instead of O. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned OY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oy %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: o %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned OY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oy %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the OY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: oy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: o %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that O allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: o %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that OY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: oy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %or = or i32 %a, %b ret i32 %or } + +; Check that ORs of spilled values can use O rather than OR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: o %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %or0 = or i32 %ret, %val0 + %or1 = or i32 %or0, %val1 + %or2 = or i32 %or1, %val2 + %or3 = or i32 %or2, %val3 + %or4 = or i32 %or3, %val4 + %or5 = or i32 %or4, %val5 + %or6 = or i32 %or5, %val6 + %or7 = or i32 %or6, %val7 + %or8 = or i32 %or7, %val8 + %or9 = or i32 %or8, %val9 + + ret i32 %or9 +} diff --git a/test/CodeGen/SystemZ/or-02.ll b/test/CodeGen/SystemZ/or-02.ll index 377a3e604c60c..267be2089e491 100644 --- a/test/CodeGen/SystemZ/or-02.ll +++ b/test/CodeGen/SystemZ/or-02.ll @@ -4,7 +4,7 @@ ; Check the lowest useful OILL value. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: oill %r2, 1 ; CHECK: br %r14 %or = or i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the OILL range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: oill %r2, 65535 ; CHECK: br %r14 %or = or i32 %a, 65535 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a) { ; Check the lowest useful OILH range, which is the next value up. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oilh %r2, 1 ; CHECK: br %r14 %or = or i32 %a, 65536 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a) { ; Check the lowest useful OILF value, which is the next value up again. define i32 @f4(i32 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilf %r2, 65537 ; CHECK: br %r14 %or = or i32 %a, 65537 @@ -40,7 +40,7 @@ define i32 @f4(i32 %a) { ; Check the high end of the OILH range. define i32 @f5(i32 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilh %r2, 65535 ; CHECK: br %r14 %or = or i32 %a, -65536 @@ -49,7 +49,7 @@ define i32 @f5(i32 %a) { ; Check the next value up, which must use OILF instead. define i32 @f6(i32 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilf %r2, 4294901761 ; CHECK: br %r14 %or = or i32 %a, -65535 @@ -58,7 +58,7 @@ define i32 @f6(i32 %a) { ; Check the highest useful OILF value. define i32 @f7(i32 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oilf %r2, 4294967294 ; CHECK: br %r14 %or = or i32 %a, -2 diff --git a/test/CodeGen/SystemZ/or-03.ll b/test/CodeGen/SystemZ/or-03.ll index 16f84f1635a8c..5fdbdfd1ed1f2 100644 --- a/test/CodeGen/SystemZ/or-03.ll +++ b/test/CodeGen/SystemZ/or-03.ll @@ -1,10 +1,13 @@ ; Test 64-bit ORs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() ; Check OGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: ogr %r2, %r3 ; CHECK: br %r14 %or = or i64 %a, %b @@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check OG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: og %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned OG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: og %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: og %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned OG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: og %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the OG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: og %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: og %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that OG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: og %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %or = or i64 %a, %b ret i64 %or } + +; Check that ORs of spilled values can use OG rather than OGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: og %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %or0 = or i64 %ret, %val0 + %or1 = or i64 %or0, %val1 + %or2 = or i64 %or1, %val2 + %or3 = or i64 %or2, %val3 + %or4 = or i64 %or3, %val4 + %or5 = or i64 %or4, %val5 + %or6 = or i64 %or5, %val6 + %or7 = or i64 %or6, %val7 + %or8 = or i64 %or7, %val8 + %or9 = or i64 %or8, %val9 + + ret i64 %or9 +} diff --git a/test/CodeGen/SystemZ/or-04.ll b/test/CodeGen/SystemZ/or-04.ll index a8278423981ac..87a30d564549f 100644 --- a/test/CodeGen/SystemZ/or-04.ll +++ b/test/CodeGen/SystemZ/or-04.ll @@ -4,7 +4,7 @@ ; Check the lowest useful OILL value. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: oill %r2, 1 ; CHECK: br %r14 %or = or i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the OILL range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: oill %r2, 65535 ; CHECK: br %r14 %or = or i64 %a, 65535 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; Check the lowest useful OILH value, which is the next value up. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oilh %r2, 1 ; CHECK: br %r14 %or = or i64 %a, 65536 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check the lowest useful OILF value, which is the next value up again. define i64 @f4(i64 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oilf %r2, 4294901759 ; CHECK: br %r14 %or = or i64 %a, 4294901759 @@ -40,7 +40,7 @@ define i64 @f4(i64 %a) { ; Check the high end of the OILH range. define i64 @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oilh %r2, 65535 ; CHECK: br %r14 %or = or i64 %a, 4294901760 @@ -49,7 +49,7 @@ define i64 @f5(i64 %a) { ; Check the high end of the OILF range. define i64 @f6(i64 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oilf %r2, 4294967295 ; CHECK: br %r14 %or = or i64 %a, 4294967295 @@ -58,7 +58,7 @@ define i64 @f6(i64 %a) { ; Check the lowest useful OIHL value, which is the next value up. define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oihl %r2, 1 ; CHECK: br %r14 %or = or i64 %a, 4294967296 @@ -67,7 +67,7 @@ define i64 @f7(i64 %a) { ; Check the next value up again, which must use two ORs. define i64 @f8(i64 %a) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: oihl %r2, 1 ; CHECK: oill %r2, 1 ; CHECK: br %r14 @@ -77,7 +77,7 @@ define i64 @f8(i64 %a) { ; Check the high end of the OILL range. define i64 @f9(i64 %a) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: oihl %r2, 1 ; CHECK: oill %r2, 65535 ; CHECK: br %r14 @@ -87,7 +87,7 @@ define i64 @f9(i64 %a) { ; Check the next value up, which must use OILH define i64 @f10(i64 %a) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: oihl %r2, 1 ; CHECK: oilh %r2, 1 ; CHECK: br %r14 @@ -97,7 +97,7 @@ define i64 @f10(i64 %a) { ; Check the next value up again, which must use OILF define i64 @f11(i64 %a) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: oihl %r2, 1 ; CHECK: oilf %r2, 65537 ; CHECK: br %r14 @@ -107,7 +107,7 @@ define i64 @f11(i64 %a) { ; Check the high end of the OIHL range. define i64 @f12(i64 %a) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: oihl %r2, 65535 ; CHECK: br %r14 %or = or i64 %a, 281470681743360 @@ -117,7 +117,7 @@ define i64 @f12(i64 %a) { ; Check a combination of the high end of the OIHL range and the high end ; of the OILF range. define i64 @f13(i64 %a) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: oihl %r2, 65535 ; CHECK: oilf %r2, 4294967295 ; CHECK: br %r14 @@ -127,7 +127,7 @@ define i64 @f13(i64 %a) { ; Check the lowest useful OIHH value. define i64 @f14(i64 %a) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: oihh %r2, 1 ; CHECK: br %r14 %or = or i64 %a, 281474976710656 @@ -136,7 +136,7 @@ define i64 @f14(i64 %a) { ; Check the next value up, which needs two ORs. define i64 @f15(i64 %a) { -; CHECK: f15: +; CHECK-LABEL: f15: ; CHECK: oihh %r2, 1 ; CHECK: oill %r2, 1 ; CHECK: br %r14 @@ -146,7 +146,7 @@ define i64 @f15(i64 %a) { ; Check the lowest useful OIHF value. define i64 @f16(i64 %a) { -; CHECK: f16: +; CHECK-LABEL: f16: ; CHECK: oihf %r2, 65537 ; CHECK: br %r14 %or = or i64 %a, 281479271677952 @@ -155,7 +155,7 @@ define i64 @f16(i64 %a) { ; Check the high end of the OIHH range. define i64 @f17(i64 %a) { -; CHECK: f17: +; CHECK-LABEL: f17: ; CHECK: oihh %r2, 65535 ; CHECK: br %r14 %or = or i64 %a, 18446462598732840960 @@ -164,7 +164,7 @@ define i64 @f17(i64 %a) { ; Check the high end of the OIHF range. define i64 @f18(i64 %a) { -; CHECK: f18: +; CHECK-LABEL: f18: ; CHECK: oihf %r2, 4294967295 ; CHECK: br %r14 %or = or i64 %a, -4294967296 @@ -173,7 +173,7 @@ define i64 @f18(i64 %a) { ; Check the highest useful OR value. define i64 @f19(i64 %a) { -; CHECK: f19: +; CHECK-LABEL: f19: ; CHECK: oihf %r2, 4294967295 ; CHECK: oilf %r2, 4294967294 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/or-05.ll b/test/CodeGen/SystemZ/or-05.ll index 9b6c10d4b5ce6..d90589128674b 100644 --- a/test/CodeGen/SystemZ/or-05.ll +++ b/test/CodeGen/SystemZ/or-05.ll @@ -4,7 +4,7 @@ ; Check the lowest useful constant, expressed as a signed integer. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: oi 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) { ; Check the highest useful constant, expressed as a signed integer. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) { ; Check the lowest useful constant, expressed as an unsigned integer. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oi 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) { ; Check the highest useful constant, expressed as a unsigned integer. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) { ; Check the high end of the OI range. define void @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oi 4095(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -60,7 +60,7 @@ define void @f5(i8 *%src) { ; Check the next byte up, which should use OIY instead of OI. define void @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oiy 4096(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -72,7 +72,7 @@ define void @f6(i8 *%src) { ; Check the high end of the OIY range. define void @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oiy 524287(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -85,7 +85,7 @@ define void @f7(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, 524288 ; CHECK: oi 0(%r2), 127 ; CHECK: br %r14 @@ -98,7 +98,7 @@ define void @f8(i8 *%src) { ; Check the high end of the negative OIY range. define void @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: oiy -1(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -110,7 +110,7 @@ define void @f9(i8 *%src) { ; Check the low end of the OIY range. define void @f10(i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: oiy -524288(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -123,7 +123,7 @@ define void @f10(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f11(i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r2, -524289 ; CHECK: oi 0(%r2), 127 ; CHECK: br %r14 @@ -136,7 +136,7 @@ define void @f11(i8 *%src) { ; Check that OI does not allow an index define void @f12(i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agr %r2, %r3 ; CHECK: oi 4095(%r2), 127 ; CHECK: br %r14 @@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) { ; Check that OIY does not allow an index define void @f13(i64 %src, i64 %index) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agr %r2, %r3 ; CHECK: oiy 4096(%r2), 127 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/or-06.ll b/test/CodeGen/SystemZ/or-06.ll index a24a18a191f1d..0a865d3509423 100644 --- a/test/CodeGen/SystemZ/or-06.ll +++ b/test/CodeGen/SystemZ/or-06.ll @@ -5,7 +5,7 @@ ; Zero extension to 32 bits, negative constant. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) { ; Zero extension to 64 bits, negative constant. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) { ; Zero extension to 32 bits, positive constant. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) { ; Zero extension to 64 bits, positive constant. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) { ; Sign extension to 32 bits, negative constant. define void @f5(i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) { ; Sign extension to 64 bits, negative constant. define void @f6(i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) { ; Sign extension to 32 bits, positive constant. define void @f7(i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) { ; Sign extension to 64 bits, positive constant. define void @f8(i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: oi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/or-07.ll b/test/CodeGen/SystemZ/or-07.ll new file mode 100644 index 0000000000000..9fff88e71657b --- /dev/null +++ b/test/CodeGen/SystemZ/or-07.ll @@ -0,0 +1,39 @@ +; Test the three-operand forms of OR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check XRK. +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f1: +; CHECK: ork %r2, %r3, %r4 +; CHECK: br %r14 + %or = or i32 %b, %c + ret i32 %or +} + +; Check that we can still use OR in obvious cases. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %or = or i32 %a, %b + ret i32 %or +} + +; Check OGRK. +define i64 @f3(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: f3: +; CHECK: ogrk %r2, %r3, %r4 +; CHECK: br %r14 + %or = or i64 %b, %c + ret i64 %or +} + +; Check that we can still use OGR in obvious cases. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %or = or i64 %a, %b + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/or-08.ll b/test/CodeGen/SystemZ/or-08.ll new file mode 100644 index 0000000000000..8f5bf3170bed5 --- /dev/null +++ b/test/CodeGen/SystemZ/or-08.ll @@ -0,0 +1,57 @@ +; Test memory-to-memory ORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the simple i8 case. +define void @f1(i8 *%ptr1) { +; CHECK-LABEL: f1: +; CHECK: oc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %old = load i8 *%ptr2 + %or = or i8 %val, %old + store i8 %or, i8 *%ptr2 + ret void +} + +; Test the simple i16 case. +define void @f2(i16 *%ptr1) { +; CHECK-LABEL: f2: +; CHECK: oc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %old = load i16 *%ptr2 + %or = or i16 %val, %old + store i16 %or, i16 *%ptr2 + ret void +} + +; Test the simple i32 case. +define void @f3(i32 *%ptr1) { +; CHECK-LABEL: f3: +; CHECK: oc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %old = load i32 *%ptr2 + %or = or i32 %old, %val + store i32 %or, i32 *%ptr2 + ret void +} + +; Test the i64 case. +define void @f4(i64 *%ptr1) { +; CHECK-LABEL: f4: +; CHECK: oc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2 + %or = or i64 %old, %val + store i64 %or, i64 *%ptr2 + ret void +} + +; Leave other more complicated tests to and-08.ll. diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll new file mode 100644 index 0000000000000..bb7fea99ca7c8 --- /dev/null +++ b/test/CodeGen/SystemZ/prefetch-01.ll @@ -0,0 +1,87 @@ +; Test data prefetching. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.prefetch(i8*, i32, i32, i32) + +@g = global [4096 x i8] zeroinitializer + +; Check that instruction read prefetches are ignored. +define void @f1(i8 *%ptr) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0) + ret void +} + +; Check that instruction write prefetches are ignored. +define void @f2(i8 *%ptr) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0) + ret void +} + +; Check data read prefetches. +define void @f3(i8 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: pfd 1, 0(%r2) +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1) + ret void +} + +; Check data write prefetches. +define void @f4(i8 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: pfd 2, 0(%r2) +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check an address at the negative end of the range. +define void @f5(i8 *%base, i64 %index) { +; CHECK-LABEL: f5: +; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add = add i64 %index, -524288 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check an address at the positive end of the range. +define void @f6(i8 *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add = add i64 %index, 524287 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check that the next address up still compiles. +define void @f7(i8 *%base, i64 %index) { +; CHECK-LABEL: f7: +; CHECK: 524288 +; CHECK: pfd 2, +; CHECK: br %r14 + %add = add i64 %index, 524288 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check pc-relative prefetches. +define void @f8() { +; CHECK-LABEL: f8: +; CHECK: pfdrl 2, g +; CHECK: br %r14 + %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} diff --git a/test/CodeGen/SystemZ/risbg-01.ll b/test/CodeGen/SystemZ/risbg-01.ll new file mode 100644 index 0000000000000..a4d11fdae5b96 --- /dev/null +++ b/test/CodeGen/SystemZ/risbg-01.ll @@ -0,0 +1,472 @@ +; Test sequences that can use RISBG with a zeroed first operand. +; The tests here assume that RISBLG isn't available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; Test an extraction of bit 0 from a right-shifted value. +define i32 @f1(i32 %foo) { +; CHECK-LABEL: f1: +; CHECK: risbg %r2, %r2, 63, 191, 54 +; CHECK: br %r14 + %shr = lshr i32 %foo, 10 + %and = and i32 %shr, 1 + ret i32 %and +} + +; ...and again with i64. +define i64 @f2(i64 %foo) { +; CHECK-LABEL: f2: +; CHECK: risbg %r2, %r2, 63, 191, 54 +; CHECK: br %r14 + %shr = lshr i64 %foo, 10 + %and = and i64 %shr, 1 + ret i64 %and +} + +; Test an extraction of other bits from a right-shifted value. +define i32 @f3(i32 %foo) { +; CHECK-LABEL: f3: +; CHECK: risbg %r2, %r2, 60, 189, 42 +; CHECK: br %r14 + %shr = lshr i32 %foo, 22 + %and = and i32 %shr, 12 + ret i32 %and +} + +; ...and again with i64. +define i64 @f4(i64 %foo) { +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r2, 60, 189, 42 +; CHECK: br %r14 + %shr = lshr i64 %foo, 22 + %and = and i64 %shr, 12 + ret i64 %and +} + +; Test an extraction of most bits from a right-shifted value. +; The range should be reduced to exclude the zeroed high bits. +define i32 @f5(i32 %foo) { +; CHECK-LABEL: f5: +; CHECK: risbg %r2, %r2, 34, 188, 62 +; CHECK: br %r14 + %shr = lshr i32 %foo, 2 + %and = and i32 %shr, -8 + ret i32 %and +} + +; ...and again with i64. +define i64 @f6(i64 %foo) { +; CHECK-LABEL: f6: +; CHECK: risbg %r2, %r2, 2, 188, 62 +; CHECK: br %r14 + %shr = lshr i64 %foo, 2 + %and = and i64 %shr, -8 + ret i64 %and +} + +; Try the next value up (mask ....1111001). This needs a separate shift +; and mask. +define i32 @f7(i32 %foo) { +; CHECK-LABEL: f7: +; CHECK: srl %r2, 2 +; CHECK: nill %r2, 65529 +; CHECK: br %r14 + %shr = lshr i32 %foo, 2 + %and = and i32 %shr, -7 + ret i32 %and +} + +; ...and again with i64. +define i64 @f8(i64 %foo) { +; CHECK-LABEL: f8: +; CHECK: srlg %r2, %r2, 2 +; CHECK: nill %r2, 65529 +; CHECK: br %r14 + %shr = lshr i64 %foo, 2 + %and = and i64 %shr, -7 + ret i64 %and +} + +; Test an extraction of bits from a left-shifted value. The range should +; be reduced to exclude the zeroed low bits. +define i32 @f9(i32 %foo) { +; CHECK-LABEL: f9: +; CHECK: risbg %r2, %r2, 56, 189, 2 +; CHECK: br %r14 + %shr = shl i32 %foo, 2 + %and = and i32 %shr, 255 + ret i32 %and +} + +; ...and again with i64. +define i64 @f10(i64 %foo) { +; CHECK-LABEL: f10: +; CHECK: risbg %r2, %r2, 56, 189, 2 +; CHECK: br %r14 + %shr = shl i64 %foo, 2 + %and = and i64 %shr, 255 + ret i64 %and +} + +; Try a wrap-around mask (mask ....111100001111). This needs a separate shift +; and mask. +define i32 @f11(i32 %foo) { +; CHECK-LABEL: f11: +; CHECK: sll %r2, 2 +; CHECK: nill %r2, 65295 +; CHECK: br %r14 + %shr = shl i32 %foo, 2 + %and = and i32 %shr, -241 + ret i32 %and +} + +; ...and again with i64. +define i64 @f12(i64 %foo) { +; CHECK-LABEL: f12: +; CHECK: sllg %r2, %r2, 2 +; CHECK: nill %r2, 65295 +; CHECK: br %r14 + %shr = shl i64 %foo, 2 + %and = and i64 %shr, -241 + ret i64 %and +} + +; Test an extraction from a rotated value, no mask wraparound. +; This is equivalent to the lshr case, because the bits from the +; shl are not used. +define i32 @f13(i32 %foo) { +; CHECK-LABEL: f13: +; CHECK: risbg %r2, %r2, 56, 188, 46 +; CHECK: br %r14 + %parta = shl i32 %foo, 14 + %partb = lshr i32 %foo, 18 + %rotl = or i32 %parta, %partb + %and = and i32 %rotl, 248 + ret i32 %and +} + +; ...and again with i64. +define i64 @f14(i64 %foo) { +; CHECK-LABEL: f14: +; CHECK: risbg %r2, %r2, 56, 188, 14 +; CHECK: br %r14 + %parta = shl i64 %foo, 14 + %partb = lshr i64 %foo, 50 + %rotl = or i64 %parta, %partb + %and = and i64 %rotl, 248 + ret i64 %and +} + +; Try a case in which only the bits from the shl are used. +define i32 @f15(i32 %foo) { +; CHECK-LABEL: f15: +; CHECK: risbg %r2, %r2, 47, 177, 14 +; CHECK: br %r14 + %parta = shl i32 %foo, 14 + %partb = lshr i32 %foo, 18 + %rotl = or i32 %parta, %partb + %and = and i32 %rotl, 114688 + ret i32 %and +} + +; ...and again with i64. +define i64 @f16(i64 %foo) { +; CHECK-LABEL: f16: +; CHECK: risbg %r2, %r2, 47, 177, 14 +; CHECK: br %r14 + %parta = shl i64 %foo, 14 + %partb = lshr i64 %foo, 50 + %rotl = or i64 %parta, %partb + %and = and i64 %rotl, 114688 + ret i64 %and +} + +; Test a 32-bit rotate in which both parts of the OR are needed. +; This needs a separate shift and mask. +define i32 @f17(i32 %foo) { +; CHECK-LABEL: f17: +; CHECK: rll %r2, %r2, 4 +; CHECK: nilf %r2, 126 +; CHECK: br %r14 + %parta = shl i32 %foo, 4 + %partb = lshr i32 %foo, 28 + %rotl = or i32 %parta, %partb + %and = and i32 %rotl, 126 + ret i32 %and +} + +; ...and for i64, where RISBG should do the rotate too. +define i64 @f18(i64 %foo) { +; CHECK-LABEL: f18: +; CHECK: risbg %r2, %r2, 57, 190, 4 +; CHECK: br %r14 + %parta = shl i64 %foo, 4 + %partb = lshr i64 %foo, 60 + %rotl = or i64 %parta, %partb + %and = and i64 %rotl, 126 + ret i64 %and +} + +; Test an arithmetic shift right in which some of the sign bits are kept. +; This needs a separate shift and mask. +define i32 @f19(i32 %foo) { +; CHECK-LABEL: f19: +; CHECK: sra %r2, 28 +; CHECK: nilf %r2, 30 +; CHECK: br %r14 + %shr = ashr i32 %foo, 28 + %and = and i32 %shr, 30 + ret i32 %and +} + +; ...and again with i64. In this case RISBG is the best way of doing the AND. +define i64 @f20(i64 %foo) { +; CHECK-LABEL: f20: +; CHECK: srag [[REG:%r[0-5]]], %r2, 60 +; CHECK: risbg %r2, [[REG]], 59, 190, 0 +; CHECK: br %r14 + %shr = ashr i64 %foo, 60 + %and = and i64 %shr, 30 + ret i64 %and +} + +; Now try an arithmetic right shift in which the sign bits aren't needed. +; Introduce a second use of %shr so that the ashr doesn't decompose to +; an lshr. +define i32 @f21(i32 %foo, i32 *%dest) { +; CHECK-LABEL: f21: +; CHECK: risbg %r2, %r2, 60, 190, 36 +; CHECK: br %r14 + %shr = ashr i32 %foo, 28 + store i32 %shr, i32 *%dest + %and = and i32 %shr, 14 + ret i32 %and +} + +; ...and again with i64. +define i64 @f22(i64 %foo, i64 *%dest) { +; CHECK-LABEL: f22: +; CHECK: risbg %r2, %r2, 60, 190, 4 +; CHECK: br %r14 + %shr = ashr i64 %foo, 60 + store i64 %shr, i64 *%dest + %and = and i64 %shr, 14 + ret i64 %and +} + +; Check that we use RISBG for shifted values even if the AND is a +; natural zero extension. +define i64 @f23(i64 %foo) { +; CHECK-LABEL: f23: +; CHECK: risbg %r2, %r2, 56, 191, 62 +; CHECK: br %r14 + %shr = lshr i64 %foo, 2 + %and = and i64 %shr, 255 + ret i64 %and +} + +; Test a case where the AND comes before a rotate. This needs a separate +; mask and rotate. +define i32 @f24(i32 %foo) { +; CHECK-LABEL: f24: +; CHECK: nilf %r2, 14 +; CHECK: rll %r2, %r2, 3 +; CHECK: br %r14 + %and = and i32 %foo, 14 + %parta = shl i32 %and, 3 + %partb = lshr i32 %and, 29 + %rotl = or i32 %parta, %partb + ret i32 %rotl +} + +; ...and again with i64, where a single RISBG is enough. +define i64 @f25(i64 %foo) { +; CHECK-LABEL: f25: +; CHECK: risbg %r2, %r2, 57, 187, 3 +; CHECK: br %r14 + %and = and i64 %foo, 14 + %parta = shl i64 %and, 3 + %partb = lshr i64 %and, 61 + %rotl = or i64 %parta, %partb + ret i64 %rotl +} + +; Test a wrap-around case in which the AND comes before a rotate. +; This again needs a separate mask and rotate. +define i32 @f26(i32 %foo) { +; CHECK-LABEL: f26: +; CHECK: nill %r2, 65487 +; CHECK: rll %r2, %r2, 5 +; CHECK: br %r14 + %and = and i32 %foo, -49 + %parta = shl i32 %and, 5 + %partb = lshr i32 %and, 27 + %rotl = or i32 %parta, %partb + ret i32 %rotl +} + +; ...and again with i64, where a single RISBG is OK. +define i64 @f27(i64 %foo) { +; CHECK-LABEL: f27: +; CHECK: risbg %r2, %r2, 55, 180, 5 +; CHECK: br %r14 + %and = and i64 %foo, -49 + %parta = shl i64 %and, 5 + %partb = lshr i64 %and, 59 + %rotl = or i64 %parta, %partb + ret i64 %rotl +} + +; Test a case where the AND comes before a shift left. +define i32 @f28(i32 %foo) { +; CHECK-LABEL: f28: +; CHECK: risbg %r2, %r2, 32, 173, 17 +; CHECK: br %r14 + %and = and i32 %foo, 32766 + %shl = shl i32 %and, 17 + ret i32 %shl +} + +; ...and again with i64. +define i64 @f29(i64 %foo) { +; CHECK-LABEL: f29: +; CHECK: risbg %r2, %r2, 0, 141, 49 +; CHECK: br %r14 + %and = and i64 %foo, 32766 + %shl = shl i64 %and, 49 + ret i64 %shl +} + +; Test the next shift up from f28, in which the mask should get shortened. +define i32 @f30(i32 %foo) { +; CHECK-LABEL: f30: +; CHECK: risbg %r2, %r2, 32, 172, 18 +; CHECK: br %r14 + %and = and i32 %foo, 32766 + %shl = shl i32 %and, 18 + ret i32 %shl +} + +; ...and again with i64. +define i64 @f31(i64 %foo) { +; CHECK-LABEL: f31: +; CHECK: risbg %r2, %r2, 0, 140, 50 +; CHECK: br %r14 + %and = and i64 %foo, 32766 + %shl = shl i64 %and, 50 + ret i64 %shl +} + +; Test a wrap-around case in which the shift left comes after the AND. +; We can't use RISBG for the shift in that case. +define i32 @f32(i32 %foo) { +; CHECK-LABEL: f32: +; CHECK: sll %r2 +; CHECK: br %r14 + %and = and i32 %foo, -7 + %shl = shl i32 %and, 10 + ret i32 %shl +} + +; ...and again with i64. +define i64 @f33(i64 %foo) { +; CHECK-LABEL: f33: +; CHECK: sllg %r2 +; CHECK: br %r14 + %and = and i64 %foo, -7 + %shl = shl i64 %and, 10 + ret i64 %shl +} + +; Test a case where the AND comes before a shift right. +define i32 @f34(i32 %foo) { +; CHECK-LABEL: f34: +; CHECK: risbg %r2, %r2, 57, 191, 55 +; CHECK: br %r14 + %and = and i32 %foo, 65535 + %shl = lshr i32 %and, 9 + ret i32 %shl +} + +; ...and again with i64. +define i64 @f35(i64 %foo) { +; CHECK-LABEL: f35: +; CHECK: risbg %r2, %r2, 57, 191, 55 +; CHECK: br %r14 + %and = and i64 %foo, 65535 + %shl = lshr i64 %and, 9 + ret i64 %shl +} + +; Test a wrap-around case where the AND comes before a shift right. +; We can't use RISBG for the shift in that case. +define i32 @f36(i32 %foo) { +; CHECK-LABEL: f36: +; CHECK: srl %r2 +; CHECK: br %r14 + %and = and i32 %foo, -25 + %shl = lshr i32 %and, 1 + ret i32 %shl +} + +; ...and again with i64. +define i64 @f37(i64 %foo) { +; CHECK-LABEL: f37: +; CHECK: srlg %r2 +; CHECK: br %r14 + %and = and i64 %foo, -25 + %shl = lshr i64 %and, 1 + ret i64 %shl +} + +; Test a combination involving a large ASHR and a shift left. We can't +; use RISBG there. +define i64 @f38(i64 %foo) { +; CHECK-LABEL: f38: +; CHECK: srag {{%r[0-5]}} +; CHECK: sllg {{%r[0-5]}} +; CHECK: br %r14 + %ashr = ashr i64 %foo, 32 + %shl = shl i64 %ashr, 5 + ret i64 %shl +} + +; Try a similar thing in which no shifted sign bits are kept. +define i64 @f39(i64 %foo, i64 *%dest) { +; CHECK-LABEL: f39: +; CHECK: srag [[REG:%r[01345]]], %r2, 35 +; CHECK: risbg %r2, %r2, 33, 189, 31 +; CHECK: br %r14 + %ashr = ashr i64 %foo, 35 + store i64 %ashr, i64 *%dest + %shl = shl i64 %ashr, 2 + %and = and i64 %shl, 2147483647 + ret i64 %and +} + +; ...and again with the next highest shift value, where one sign bit is kept. +define i64 @f40(i64 %foo, i64 *%dest) { +; CHECK-LABEL: f40: +; CHECK: srag [[REG:%r[01345]]], %r2, 36 +; CHECK: risbg %r2, [[REG]], 33, 189, 2 +; CHECK: br %r14 + %ashr = ashr i64 %foo, 36 + store i64 %ashr, i64 *%dest + %shl = shl i64 %ashr, 2 + %and = and i64 %shl, 2147483647 + ret i64 %and +} + +; In this case the sign extension is converted to a pair of 32-bit shifts, +; which is then extended to 64 bits. We previously used the wrong bit size +; when testing whether the shifted-in bits of the shift right were significant. +define i64 @f41(i1 %x) { +; CHECK-LABEL: f41: +; CHECK: sll %r2, 31 +; CHECK: sra %r2, 31 +; CHECK: llgcr %r2, %r2 +; CHECK: br %r14 + %ext = sext i1 %x to i8 + %ext2 = zext i8 %ext to i64 + ret i64 %ext2 +} diff --git a/test/CodeGen/SystemZ/risbg-02.ll b/test/CodeGen/SystemZ/risbg-02.ll new file mode 100644 index 0000000000000..5ccfab028b025 --- /dev/null +++ b/test/CodeGen/SystemZ/risbg-02.ll @@ -0,0 +1,93 @@ +; Test sequences that can use RISBG with a normal first operand. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a case with two ANDs. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: risbg %r2, %r3, 60, 62, 0 +; CHECK: br %r14 + %anda = and i32 %a, -15 + %andb = and i32 %b, 14 + %or = or i32 %anda, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: risbg %r2, %r3, 60, 62, 0 +; CHECK: br %r14 + %anda = and i64 %a, -15 + %andb = and i64 %b, 14 + %or = or i64 %anda, %andb + ret i64 %or +} + +; Test a case with two ANDs and a shift. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: risbg %r2, %r3, 60, 63, 56 +; CHECK: br %r14 + %anda = and i32 %a, -16 + %shr = lshr i32 %b, 8 + %andb = and i32 %shr, 15 + %or = or i32 %anda, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r3, 60, 63, 56 +; CHECK: br %r14 + %anda = and i64 %a, -16 + %shr = lshr i64 %b, 8 + %andb = and i64 %shr, 15 + %or = or i64 %anda, %andb + ret i64 %or +} + +; Test a case with a single AND and a left shift. +define i32 @f5(i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: risbg %r2, %r3, 32, 53, 10 +; CHECK: br %r14 + %anda = and i32 %a, 1023 + %shlb = shl i32 %b, 10 + %or = or i32 %anda, %shlb + ret i32 %or +} + +; ...and again with i64. +define i64 @f6(i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: risbg %r2, %r3, 0, 53, 10 +; CHECK: br %r14 + %anda = and i64 %a, 1023 + %shlb = shl i64 %b, 10 + %or = or i64 %anda, %shlb + ret i64 %or +} + +; Test a case with a single AND and a right shift. +define i32 @f7(i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK: risbg %r2, %r3, 40, 63, 56 +; CHECK: br %r14 + %anda = and i32 %a, -16777216 + %shrb = lshr i32 %b, 8 + %or = or i32 %anda, %shrb + ret i32 %or +} + +; ...and again with i64. +define i64 @f8(i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: risbg %r2, %r3, 8, 63, 56 +; CHECK: br %r14 + %anda = and i64 %a, -72057594037927936 + %shrb = lshr i64 %b, 8 + %or = or i64 %anda, %shrb + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/rnsbg-01.ll b/test/CodeGen/SystemZ/rnsbg-01.ll new file mode 100644 index 0000000000000..666aeb21e8d80 --- /dev/null +++ b/test/CodeGen/SystemZ/rnsbg-01.ll @@ -0,0 +1,257 @@ +; Test sequences that can use RNSBG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a simple mask, which is a wrap-around case. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: rnsbg %r2, %r3, 59, 56, 0 +; CHECK: br %r14 + %orb = or i32 %b, 96 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: rnsbg %r2, %r3, 59, 56, 0 +; CHECK: br %r14 + %orb = or i64 %b, 96 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case where no wraparound is needed. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: rnsbg %r2, %r3, 58, 61, 0 +; CHECK: br %r14 + %orb = or i32 %b, -61 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: rnsbg %r2, %r3, 58, 61, 0 +; CHECK: br %r14 + %orb = or i64 %b, -61 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with just a left shift. This can't use RNSBG. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: sll {{%r[0-5]}} +; CHECK: nr {{%r[0-5]}} +; CHECK: br %r14 + %shrb = shl i32 %b, 20 + %and = and i32 %a, %shrb + ret i32 %and +} + +; ...and again with i64. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: sllg {{%r[0-5]}} +; CHECK: ngr {{%r[0-5]}} +; CHECK: br %r14 + %shrb = shl i64 %b, 20 + %and = and i64 %a, %shrb + ret i64 %and +} + +; Test a case with just a rotate. This can't use RNSBG. +define i32 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: rll {{%r[0-5]}} +; CHECK: nr {{%r[0-5]}} +; CHECK: br %r14 + %shlb = shl i32 %b, 22 + %shrb = lshr i32 %b, 10 + %rotlb = or i32 %shlb, %shrb + %and = and i32 %a, %rotlb + ret i32 %and +} + +; ...and again with i64, which can. +define i64 @f9(i64 %a, i64 %b) { +; CHECK-LABEL: f9: +; CHECK: rnsbg %r2, %r3, 0, 63, 44 +; CHECK: br %r14 + %shlb = shl i64 %b, 44 + %shrb = lshr i64 %b, 20 + %rotlb = or i64 %shlb, %shrb + %and = and i64 %a, %rotlb + ret i64 %and +} + +; Test a case with a left shift and OR, where the OR covers all shifted bits. +; We can do the whole thing using RNSBG. +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: rnsbg %r2, %r3, 32, 56, 7 +; CHECK: br %r14 + %shlb = shl i32 %b, 7 + %orb = or i32 %shlb, 127 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f11(i64 %a, i64 %b) { +; CHECK-LABEL: f11: +; CHECK: rnsbg %r2, %r3, 0, 56, 7 +; CHECK: br %r14 + %shlb = shl i64 %b, 7 + %orb = or i64 %shlb, 127 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a left shift and OR, where the OR doesn't cover all +; shifted bits. We can't use RNSBG for the shift, but we can for the OR +; and AND. +define i32 @f12(i32 %a, i32 %b) { +; CHECK-LABEL: f12: +; CHECK: sll %r3, 7 +; CHECK: rnsbg %r2, %r3, 32, 57, 0 +; CHECK: br %r14 + %shlb = shl i32 %b, 7 + %orb = or i32 %shlb, 63 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f13(i64 %a, i64 %b) { +; CHECK-LABEL: f13: +; CHECK: sllg [[REG:%r[01345]]], %r3, 7 +; CHECK: rnsbg %r2, [[REG]], 0, 57, 0 +; CHECK: br %r14 + %shlb = shl i64 %b, 7 + %orb = or i64 %shlb, 63 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a right shift and OR, where the OR covers all the shifted +; bits. The whole thing can be done using RNSBG. +define i32 @f14(i32 %a, i32 %b) { +; CHECK-LABEL: f14: +; CHECK: rnsbg %r2, %r3, 60, 63, 37 +; CHECK: br %r14 + %shrb = lshr i32 %b, 27 + %orb = or i32 %shrb, -16 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f15(i64 %a, i64 %b) { +; CHECK-LABEL: f15: +; CHECK: rnsbg %r2, %r3, 60, 63, 5 +; CHECK: br %r14 + %shrb = lshr i64 %b, 59 + %orb = or i64 %shrb, -16 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a right shift and OR, where the OR doesn't cover all the +; shifted bits. The shift needs to be done separately, but the OR and AND +; can use RNSBG. +define i32 @f16(i32 %a, i32 %b) { +; CHECK-LABEL: f16: +; CHECK: srl %r3, 29 +; CHECK: rnsbg %r2, %r3, 60, 63, 0 +; CHECK: br %r14 + %shrb = lshr i32 %b, 29 + %orb = or i32 %shrb, -16 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f17(i64 %a, i64 %b) { +; CHECK-LABEL: f17: +; CHECK: srlg [[REG:%r[01345]]], %r3, 61 +; CHECK: rnsbg %r2, [[REG]], 60, 63, 0 +; CHECK: br %r14 + %shrb = lshr i64 %b, 61 + %orb = or i64 %shrb, -16 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a combination involving an ASHR in which the sign bits matter. +; We can't use RNSBG for the ASHR in that case, but we can for the rest. +define i32 @f18(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f18: +; CHECK: sra %r3, 4 +; CHECK: rnsbg %r2, %r3, 32, 62, 1 +; CHECK: br %r14 + %ashrb = ashr i32 %b, 4 + store i32 %ashrb, i32 *%dest + %shlb = shl i32 %ashrb, 1 + %orb = or i32 %shlb, 1 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f19(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f19: +; CHECK: srag [[REG:%r[0145]]], %r3, 34 +; CHECK: rnsbg %r2, [[REG]], 0, 62, 1 +; CHECK: br %r14 + %ashrb = ashr i64 %b, 34 + store i64 %ashrb, i64 *%dest + %shlb = shl i64 %ashrb, 1 + %orb = or i64 %shlb, 1 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a combination involving an ASHR in which the sign bits don't matter. +define i32 @f20(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f20: +; CHECK: rnsbg %r2, %r3, 48, 62, 48 +; CHECK: br %r14 + %ashrb = ashr i32 %b, 17 + store i32 %ashrb, i32 *%dest + %shlb = shl i32 %ashrb, 1 + %orb = or i32 %shlb, -65535 + %and = and i32 %a, %orb + ret i32 %and +} + +; ...and again with i64. +define i64 @f21(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f21: +; CHECK: rnsbg %r2, %r3, 48, 62, 16 +; CHECK: br %r14 + %ashrb = ashr i64 %b, 49 + store i64 %ashrb, i64 *%dest + %shlb = shl i64 %ashrb, 1 + %orb = or i64 %shlb, -65535 + %and = and i64 %a, %orb + ret i64 %and +} + +; Test a case with a shift, OR, and rotate where the OR covers all shifted bits. +define i64 @f22(i64 %a, i64 %b) { +; CHECK-LABEL: f22: +; CHECK: rnsbg %r2, %r3, 60, 54, 9 +; CHECK: br %r14 + %shlb = shl i64 %b, 5 + %orb = or i64 %shlb, 31 + %shlorb = shl i64 %orb, 4 + %shrorb = lshr i64 %orb, 60 + %rotlorb = or i64 %shlorb, %shrorb + %and = and i64 %a, %rotlorb + ret i64 %and +} diff --git a/test/CodeGen/SystemZ/rosbg-01.ll b/test/CodeGen/SystemZ/rosbg-01.ll new file mode 100644 index 0000000000000..0abacccba14ce --- /dev/null +++ b/test/CodeGen/SystemZ/rosbg-01.ll @@ -0,0 +1,110 @@ +; Test sequences that can use ROSBG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the simple case. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: rosbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i32 %b, 16 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: rosbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i64 %b, 16 + %or = or i64 %a, %andb + ret i64 %or +} + +; Test a case where wraparound is needed. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: rosbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i32 %b, -7 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: rosbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i64 %b, -7 + %or = or i64 %a, %andb + ret i64 %or +} + +; Test a case with just a shift. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: rosbg %r2, %r3, 32, 51, 12 +; CHECK: br %r14 + %shrb = shl i32 %b, 12 + %or = or i32 %a, %shrb + ret i32 %or +} + +; ...and again with i64. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: rosbg %r2, %r3, 0, 51, 12 +; CHECK: br %r14 + %shrb = shl i64 %b, 12 + %or = or i64 %a, %shrb + ret i64 %or +} + +; Test a case with just a rotate. This can't use ROSBG. +define i32 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: rll {{%r[0-5]}} +; CHECK: or {{%r[0-5]}} +; CHECK: br %r14 + %shlb = shl i32 %b, 30 + %shrb = lshr i32 %b, 2 + %rotlb = or i32 %shlb, %shrb + %or = or i32 %a, %rotlb + ret i32 %or +} + +; ...and again with i64, which can. +define i64 @f9(i64 %a, i64 %b) { +; CHECK-LABEL: f9: +; CHECK: rosbg %r2, %r3, 0, 63, 47 +; CHECK: br %r14 + %shlb = shl i64 %b, 47 + %shrb = lshr i64 %b, 17 + %rotlb = or i64 %shlb, %shrb + %or = or i64 %a, %rotlb + ret i64 %or +} + +; Test a case with a shift and AND. +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: rosbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shrb = shl i32 %b, 4 + %andb = and i32 %shrb, 240 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f11(i64 %a, i64 %b) { +; CHECK-LABEL: f11: +; CHECK: rosbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shrb = shl i64 %b, 4 + %andb = and i64 %shrb, 240 + %or = or i64 %a, %andb + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/rxsbg-01.ll b/test/CodeGen/SystemZ/rxsbg-01.ll new file mode 100644 index 0000000000000..5491bff2ecdc2 --- /dev/null +++ b/test/CodeGen/SystemZ/rxsbg-01.ll @@ -0,0 +1,112 @@ +; Test sequences that can use RXSBG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the simple case. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: rxsbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i32 %b, 16 + %xor = xor i32 %a, %andb + ret i32 %xor +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: rxsbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i64 %b, 16 + %xor = xor i64 %a, %andb + ret i64 %xor +} + +; Test a case where wraparound is needed. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: rxsbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i32 %b, -7 + %xor = xor i32 %a, %andb + ret i32 %xor +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: rxsbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i64 %b, -7 + %xor = xor i64 %a, %andb + ret i64 %xor +} + +; Test a case with just a shift. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: rxsbg %r2, %r3, 32, 51, 12 +; CHECK: br %r14 + %shlb = shl i32 %b, 12 + %xor = xor i32 %a, %shlb + ret i32 %xor +} + +; ...and again with i64. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: rxsbg %r2, %r3, 0, 51, 12 +; CHECK: br %r14 + %shlb = shl i64 %b, 12 + %xor = xor i64 %a, %shlb + ret i64 %xor +} + +; Test a case with just a rotate (using XOR for the rotate combination too, +; to test that this kind of rotate does get recognised by the target- +; independent code). This can't use RXSBG. +define i32 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: rll {{%r[0-5]}} +; CHECK: xr {{%r[0-5]}} +; CHECK: br %r14 + %shlb = shl i32 %b, 30 + %shrb = lshr i32 %b, 2 + %rotlb = xor i32 %shlb, %shrb + %xor = xor i32 %a, %rotlb + ret i32 %xor +} + +; ...and again with i64, which can use RXSBG for the rotate. +define i64 @f9(i64 %a, i64 %b) { +; CHECK-LABEL: f9: +; CHECK: rxsbg %r2, %r3, 0, 63, 47 +; CHECK: br %r14 + %shlb = shl i64 %b, 47 + %shrb = lshr i64 %b, 17 + %rotlb = xor i64 %shlb, %shrb + %xor = xor i64 %a, %rotlb + ret i64 %xor +} + +; Test a case with a shift and AND. +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: rxsbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shlb = shl i32 %b, 4 + %andb = and i32 %shlb, 240 + %xor = xor i32 %a, %andb + ret i32 %xor +} + +; ...and again with i64. +define i64 @f11(i64 %a, i64 %b) { +; CHECK-LABEL: f11: +; CHECK: rxsbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shlb = shl i64 %b, 4 + %andb = and i64 %shlb, 240 + %xor = xor i64 %a, %andb + ret i64 %xor +} diff --git a/test/CodeGen/SystemZ/setcc-01.ll b/test/CodeGen/SystemZ/setcc-01.ll new file mode 100644 index 0000000000000..4626760fa25bc --- /dev/null +++ b/test/CodeGen/SystemZ/setcc-01.ll @@ -0,0 +1,74 @@ +; Test SETCC for every integer condition. The tests here assume that +; RISBLG isn't available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; Test CC in { 0 }, with 3 don't care. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = icmp eq i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1 }, with 3 don't care. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36 +; CHECK: br %r14 + %cond = icmp slt i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 1 }, with 3 don't care. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -536870912 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = icmp sle i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 2 }, with 3 don't care. +define i32 @f4(i32 %a, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %cond = icmp sgt i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 2 }, with 3 don't care. +define i32 @f5(i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 4294967295 +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36 +; CHECK: br %r14 + %cond = icmp sge i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1, 2 }, with 3 don't care. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = icmp ne i32 %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/setcc-02.ll b/test/CodeGen/SystemZ/setcc-02.ll new file mode 100644 index 0000000000000..6a7be47a92b7c --- /dev/null +++ b/test/CodeGen/SystemZ/setcc-02.ll @@ -0,0 +1,174 @@ +; Test SETCC for every floating-point condition. The tests here assume that +; RISBLG isn't available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; Test CC in { 0 } +define i32 @f1(float %a, float %b) { +; CHECK-LABEL: f1: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp oeq float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1 } +define i32 @f2(float %a, float %b) { +; CHECK-LABEL: f2: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp olt float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 1 } +define i32 @f3(float %a, float %b) { +; CHECK-LABEL: f3: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -536870912 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp ole float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 2 } +define i32 @f4(float %a, float %b) { +; CHECK-LABEL: f4: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp ogt float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 2 } +define i32 @f5(float %a, float %b) { +; CHECK-LABEL: f5: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 4294967295 +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36 +; CHECK: br %r14 + %cond = fcmp oge float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1, 2 } +define i32 @f6(float %a, float %b) { +; CHECK-LABEL: f6: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], 268435456 +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %cond = fcmp one float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 1, 2 } +define i32 @f7(float %a, float %b) { +; CHECK-LABEL: f7: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp ord float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 3 } +define i32 @f8(float %a, float %b) { +; CHECK-LABEL: f8: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp uno float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 3 } +define i32 @f9(float %a, float %b) { +; CHECK-LABEL: f9: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], -268435456 +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %cond = fcmp ueq float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1, 3 } +define i32 @f10(float %a, float %b) { +; CHECK-LABEL: f10: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36 +; CHECK: br %r14 + %cond = fcmp ult float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 1, 3 } +define i32 @f11(float %a, float %b) { +; CHECK-LABEL: f11: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp ule float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 2, 3 } +define i32 @f12(float %a, float %b) { +; CHECK-LABEL: f12: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %cond = fcmp ugt float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 0, 2, 3 } +define i32 @f13(float %a, float %b) { +; CHECK-LABEL: f13: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp uge float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} + +; Test CC in { 1, 2, 3 } +define i32 @f14(float %a, float %b) { +; CHECK-LABEL: f14: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: srl %r2, 31 +; CHECK: br %r14 + %cond = fcmp une float %a, %b + %res = zext i1 %cond to i32 + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/shift-01.ll b/test/CodeGen/SystemZ/shift-01.ll index e5a459aaa8285..5dab36b379c4d 100644 --- a/test/CodeGen/SystemZ/shift-01.ll +++ b/test/CodeGen/SystemZ/shift-01.ll @@ -4,7 +4,7 @@ ; Check the low end of the SLL range. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sll %r2, 1 ; CHECK: br %r14 %shift = shl i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the defined SLL range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sll %r2, 31 ; CHECK: br %r14 %shift = shl i32 %a, 31 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a) { ; We don't generate shifts by out-of-range values. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: sll %r2, 32 ; CHECK: br %r14 %shift = shl i32 %a, 32 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a) { ; Make sure that we don't generate negative shift amounts. define i32 @f4(i32 %a, i32 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: sll %r2, -1{{.*}} ; CHECK: br %r14 %sub = sub i32 %amt, 1 @@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) { ; Check variable shifts. define i32 @f5(i32 %a, i32 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sll %r2, 0(%r3) ; CHECK: br %r14 %shift = shl i32 %a, %amt @@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) { ; Check shift amounts that have a constant term. define i32 @f6(i32 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sll %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) { ; ...and again with a truncated 64-bit shift amount. define i32 @f7(i32 %a, i64 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sll %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i32 @f8(i32 %a, i32 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sll %r2, 4095(%r3) ; CHECK: br %r14 %add = add i32 %amt, 4095 @@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) { ; Check the next value up. Again, we could mask the amount instead. define i32 @f9(i32 %a, i32 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ahi %r3, 4096 ; CHECK: sll %r2, 0(%r3) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) { ; Check that we don't try to generate "indexed" shifts. define i32 @f10(i32 %a, i32 %b, i32 %c) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ar {{%r3, %r4|%r4, %r3}} ; CHECK: sll %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i32 @f11(i32 %a, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: l %r1, 0(%r3) ; CHECK: sll %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-02.ll b/test/CodeGen/SystemZ/shift-02.ll index 38093a8ff7a0f..27e73cd3a1f88 100644 --- a/test/CodeGen/SystemZ/shift-02.ll +++ b/test/CodeGen/SystemZ/shift-02.ll @@ -4,7 +4,7 @@ ; Check the low end of the SRL range. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: srl %r2, 1 ; CHECK: br %r14 %shift = lshr i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the defined SRL range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: srl %r2, 31 ; CHECK: br %r14 %shift = lshr i32 %a, 31 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a) { ; We don't generate shifts by out-of-range values. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: srl %r2, 32 ; CHECK: br %r14 %shift = lshr i32 %a, 32 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a) { ; Make sure that we don't generate negative shift amounts. define i32 @f4(i32 %a, i32 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: srl %r2, -1{{.*}} ; CHECK: br %r14 %sub = sub i32 %amt, 1 @@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) { ; Check variable shifts. define i32 @f5(i32 %a, i32 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: srl %r2, 0(%r3) ; CHECK: br %r14 %shift = lshr i32 %a, %amt @@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) { ; Check shift amounts that have a constant term. define i32 @f6(i32 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: srl %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) { ; ...and again with a truncated 64-bit shift amount. define i32 @f7(i32 %a, i64 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: srl %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i32 @f8(i32 %a, i32 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: srl %r2, 4095(%r3) ; CHECK: br %r14 %add = add i32 %amt, 4095 @@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) { ; Check the next value up. Again, we could mask the amount instead. define i32 @f9(i32 %a, i32 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ahi %r3, 4096 ; CHECK: srl %r2, 0(%r3) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) { ; Check that we don't try to generate "indexed" shifts. define i32 @f10(i32 %a, i32 %b, i32 %c) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ar {{%r3, %r4|%r4, %r3}} ; CHECK: srl %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i32 @f11(i32 %a, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: l %r1, 0(%r3) ; CHECK: srl %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-03.ll b/test/CodeGen/SystemZ/shift-03.ll index ca510f3c429b2..c45ae48b4071b 100644 --- a/test/CodeGen/SystemZ/shift-03.ll +++ b/test/CodeGen/SystemZ/shift-03.ll @@ -4,7 +4,7 @@ ; Check the low end of the SRA range. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sra %r2, 1 ; CHECK: br %r14 %shift = ashr i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the defined SRA range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sra %r2, 31 ; CHECK: br %r14 %shift = ashr i32 %a, 31 @@ -22,7 +22,7 @@ define i32 @f2(i32 %a) { ; We don't generate shifts by out-of-range values. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: sra %r2, 32 ; CHECK: br %r14 %shift = ashr i32 %a, 32 @@ -31,7 +31,7 @@ define i32 @f3(i32 %a) { ; Make sure that we don't generate negative shift amounts. define i32 @f4(i32 %a, i32 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK-NOT: sra %r2, -1{{.*}} ; CHECK: br %r14 %sub = sub i32 %amt, 1 @@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) { ; Check variable shifts. define i32 @f5(i32 %a, i32 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sra %r2, 0(%r3) ; CHECK: br %r14 %shift = ashr i32 %a, %amt @@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) { ; Check shift amounts that have a constant term. define i32 @f6(i32 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sra %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) { ; ...and again with a truncated 64-bit shift amount. define i32 @f7(i32 %a, i64 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sra %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i32 @f8(i32 %a, i32 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sra %r2, 4095(%r3) ; CHECK: br %r14 %add = add i32 %amt, 4095 @@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) { ; Check the next value up. Again, we could mask the amount instead. define i32 @f9(i32 %a, i32 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: ahi %r3, 4096 ; CHECK: sra %r2, 0(%r3) ; CHECK: br %r14 @@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) { ; Check that we don't try to generate "indexed" shifts. define i32 @f10(i32 %a, i32 %b, i32 %c) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: ar {{%r3, %r4|%r4, %r3}} ; CHECK: sra %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i32 @f11(i32 %a, i32 *%ptr) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: l %r1, 0(%r3) ; CHECK: sra %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll index 0146a86ee0627..04b39d002c5da 100644 --- a/test/CodeGen/SystemZ/shift-04.ll +++ b/test/CodeGen/SystemZ/shift-04.ll @@ -4,7 +4,7 @@ ; Check the low end of the RLL range. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: rll %r2, %r2, 1 ; CHECK: br %r14 %parta = shl i32 %a, 1 @@ -15,7 +15,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the defined RLL range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: rll %r2, %r2, 31 ; CHECK: br %r14 %parta = shl i32 %a, 31 @@ -26,7 +26,7 @@ define i32 @f2(i32 %a) { ; We don't generate shifts by out-of-range values. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: rll ; CHECK: br %r14 %parta = shl i32 %a, 32 @@ -37,7 +37,7 @@ define i32 @f3(i32 %a) { ; Check variable shifts. define i32 @f4(i32 %a, i32 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: rll %r2, %r2, 0(%r3) ; CHECK: br %r14 %amtb = sub i32 32, %amt @@ -49,7 +49,7 @@ define i32 @f4(i32 %a, i32 %amt) { ; Check shift amounts that have a constant term. define i32 @f5(i32 %a, i32 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: rll %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -62,7 +62,7 @@ define i32 @f5(i32 %a, i32 %amt) { ; ...and again with a truncated 64-bit shift amount. define i32 @f6(i32 %a, i64 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: rll %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -76,7 +76,7 @@ define i32 @f6(i32 %a, i64 %amt) { ; ...and again with a different truncation representation. define i32 @f7(i32 %a, i64 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: rll %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -92,7 +92,7 @@ define i32 @f7(i32 %a, i64 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i32 @f8(i32 %a, i32 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: rll %r2, %r2, 524287(%r3) ; CHECK: br %r14 %add = add i32 %amt, 524287 @@ -106,7 +106,7 @@ define i32 @f8(i32 %a, i32 %amt) { ; Check the next value up, which without masking must use a separate ; addition. define i32 @f9(i32 %a, i32 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: afi %r3, 524288 ; CHECK: rll %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -120,7 +120,7 @@ define i32 @f9(i32 %a, i32 %amt) { ; Check cases where 1 is subtracted from the shift amount. define i32 @f10(i32 %a, i32 %amt) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: rll %r2, %r2, -1(%r3) ; CHECK: br %r14 %suba = sub i32 %amt, 1 @@ -134,7 +134,7 @@ define i32 @f10(i32 %a, i32 %amt) { ; Check the lowest value that can be subtracted from the shift amount. ; Again, we could mask the shift amount instead. define i32 @f11(i32 %a, i32 %amt) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: rll %r2, %r2, -524288(%r3) ; CHECK: br %r14 %suba = sub i32 %amt, 524288 @@ -148,7 +148,7 @@ define i32 @f11(i32 %a, i32 %amt) { ; Check the next value down, which without masking must use a separate ; addition. define i32 @f12(i32 %a, i32 %amt) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: afi %r3, -524289 ; CHECK: rll %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -162,7 +162,7 @@ define i32 @f12(i32 %a, i32 %amt) { ; Check that we don't try to generate "indexed" shifts. define i32 @f13(i32 %a, i32 %b, i32 %c) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: ar {{%r3, %r4|%r4, %r3}} ; CHECK: rll %r2, %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -176,7 +176,7 @@ define i32 @f13(i32 %a, i32 %b, i32 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i32 @f14(i32 %a, i32 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: l %r1, 0(%r3) ; CHECK: rll %r2, %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-05.ll b/test/CodeGen/SystemZ/shift-05.ll index 8c0ca9381bcbb..833b2fbae1e5d 100644 --- a/test/CodeGen/SystemZ/shift-05.ll +++ b/test/CodeGen/SystemZ/shift-05.ll @@ -4,7 +4,7 @@ ; Check the low end of the SLLG range. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: sllg %r2, %r2, 1 ; CHECK: br %r14 %shift = shl i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the defined SLLG range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: sllg %r2, %r2, 63 ; CHECK: br %r14 %shift = shl i64 %a, 63 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; We don't generate shifts by out-of-range values. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: sllg ; CHECK: br %r14 %shift = shl i64 %a, 64 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check variable shifts. define i64 @f4(i64 %a, i64 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: sllg %r2, %r2, 0(%r3) ; CHECK: br %r14 %shift = shl i64 %a, %amt @@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) { ; Check shift amounts that have a constant term. define i64 @f5(i64 %a, i64 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: sllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) { ; ...and again with a sign-extended 32-bit shift amount. define i64 @f6(i64 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: sllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) { ; ...and now with a zero-extended 32-bit shift amount. define i64 @f7(i64 %a, i32 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: sllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i64 @f8(i64 %a, i64 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: sllg %r2, %r2, 524287(%r3) ; CHECK: br %r14 %add = add i64 %amt, 524287 @@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) { ; Check the next value up, which without masking must use a separate ; addition. define i64 @f9(i64 %a, i64 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: a{{g?}}fi %r3, 524288 ; CHECK: sllg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) { ; Check cases where 1 is subtracted from the shift amount. define i64 @f10(i64 %a, i64 %amt) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: sllg %r2, %r2, -1(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 1 @@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) { ; Check the lowest value that can be subtracted from the shift amount. ; Again, we could mask the shift amount instead. define i64 @f11(i64 %a, i64 %amt) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: sllg %r2, %r2, -524288(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 524288 @@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) { ; Check the next value down, which without masking must use a separate ; addition. define i64 @f12(i64 %a, i64 %amt) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: a{{g?}}fi %r3, -524289 ; CHECK: sllg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) { ; Check that we don't try to generate "indexed" shifts. define i64 @f13(i64 %a, i64 %b, i64 %c) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} ; CHECK: sllg %r2, %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i64 @f14(i64 %a, i64 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: l %r1, 4(%r3) ; CHECK: sllg %r2, %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-06.ll b/test/CodeGen/SystemZ/shift-06.ll index 5f600b45a8845..74cae1213a3e6 100644 --- a/test/CodeGen/SystemZ/shift-06.ll +++ b/test/CodeGen/SystemZ/shift-06.ll @@ -4,7 +4,7 @@ ; Check the low end of the SRLG range. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: srlg %r2, %r2, 1 ; CHECK: br %r14 %shift = lshr i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the defined SRLG range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: srlg %r2, %r2, 63 ; CHECK: br %r14 %shift = lshr i64 %a, 63 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; We don't generate shifts by out-of-range values. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: srlg ; CHECK: br %r14 %shift = lshr i64 %a, 64 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check variable shifts. define i64 @f4(i64 %a, i64 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: srlg %r2, %r2, 0(%r3) ; CHECK: br %r14 %shift = lshr i64 %a, %amt @@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) { ; Check shift amounts that have a constant term. define i64 @f5(i64 %a, i64 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: srlg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) { ; ...and again with a sign-extended 32-bit shift amount. define i64 @f6(i64 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: srlg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) { ; ...and now with a zero-extended 32-bit shift amount. define i64 @f7(i64 %a, i32 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: srlg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i64 @f8(i64 %a, i64 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: srlg %r2, %r2, 524287(%r3) ; CHECK: br %r14 %add = add i64 %amt, 524287 @@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) { ; Check the next value up, which without masking must use a separate ; addition. define i64 @f9(i64 %a, i64 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: a{{g?}}fi %r3, 524288 ; CHECK: srlg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) { ; Check cases where 1 is subtracted from the shift amount. define i64 @f10(i64 %a, i64 %amt) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: srlg %r2, %r2, -1(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 1 @@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) { ; Check the lowest value that can be subtracted from the shift amount. ; Again, we could mask the shift amount instead. define i64 @f11(i64 %a, i64 %amt) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: srlg %r2, %r2, -524288(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 524288 @@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) { ; Check the next value down, which without masking must use a separate ; addition. define i64 @f12(i64 %a, i64 %amt) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: a{{g?}}fi %r3, -524289 ; CHECK: srlg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) { ; Check that we don't try to generate "indexed" shifts. define i64 @f13(i64 %a, i64 %b, i64 %c) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} ; CHECK: srlg %r2, %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i64 @f14(i64 %a, i64 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: l %r1, 4(%r3) ; CHECK: srlg %r2, %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-07.ll b/test/CodeGen/SystemZ/shift-07.ll index ef583e8f3f0dc..712849df8ad14 100644 --- a/test/CodeGen/SystemZ/shift-07.ll +++ b/test/CodeGen/SystemZ/shift-07.ll @@ -4,7 +4,7 @@ ; Check the low end of the SRAG range. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: srag %r2, %r2, 1 ; CHECK: br %r14 %shift = ashr i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the defined SRAG range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: srag %r2, %r2, 63 ; CHECK: br %r14 %shift = ashr i64 %a, 63 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; We don't generate shifts by out-of-range values. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: srag ; CHECK: br %r14 %shift = ashr i64 %a, 64 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check variable shifts. define i64 @f4(i64 %a, i64 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: srag %r2, %r2, 0(%r3) ; CHECK: br %r14 %shift = ashr i64 %a, %amt @@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) { ; Check shift amounts that have a constant term. define i64 @f5(i64 %a, i64 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: srag %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) { ; ...and again with a sign-extended 32-bit shift amount. define i64 @f6(i64 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: srag %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) { ; ...and now with a zero-extended 32-bit shift amount. define i64 @f7(i64 %a, i32 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: srag %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i64 @f8(i64 %a, i64 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: srag %r2, %r2, 524287(%r3) ; CHECK: br %r14 %add = add i64 %amt, 524287 @@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) { ; Check the next value up, which without masking must use a separate ; addition. define i64 @f9(i64 %a, i64 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: a{{g?}}fi %r3, 524288 ; CHECK: srag %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) { ; Check cases where 1 is subtracted from the shift amount. define i64 @f10(i64 %a, i64 %amt) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: srag %r2, %r2, -1(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 1 @@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) { ; Check the lowest value that can be subtracted from the shift amount. ; Again, we could mask the shift amount instead. define i64 @f11(i64 %a, i64 %amt) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: srag %r2, %r2, -524288(%r3) ; CHECK: br %r14 %sub = sub i64 %amt, 524288 @@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) { ; Check the next value down, which without masking must use a separate ; addition. define i64 @f12(i64 %a, i64 %amt) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: a{{g?}}fi %r3, -524289 ; CHECK: srag %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) { ; Check that we don't try to generate "indexed" shifts. define i64 @f13(i64 %a, i64 %b, i64 %c) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} ; CHECK: srag %r2, %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i64 @f14(i64 %a, i64 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: l %r1, 4(%r3) ; CHECK: srag %r2, %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-08.ll b/test/CodeGen/SystemZ/shift-08.ll index 0688a0671671e..47283b50221cf 100644 --- a/test/CodeGen/SystemZ/shift-08.ll +++ b/test/CodeGen/SystemZ/shift-08.ll @@ -4,7 +4,7 @@ ; Check the low end of the RLLG range. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: rllg %r2, %r2, 1 ; CHECK: br %r14 %parta = shl i64 %a, 1 @@ -15,7 +15,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the defined RLLG range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: rllg %r2, %r2, 63 ; CHECK: br %r14 %parta = shl i64 %a, 63 @@ -26,7 +26,7 @@ define i64 @f2(i64 %a) { ; We don't generate shifts by out-of-range values. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK-NOT: rllg ; CHECK: br %r14 %parta = shl i64 %a, 64 @@ -37,7 +37,7 @@ define i64 @f3(i64 %a) { ; Check variable shifts. define i64 @f4(i64 %a, i64 %amt) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: rllg %r2, %r2, 0(%r3) ; CHECK: br %r14 %amtb = sub i64 64, %amt @@ -49,7 +49,7 @@ define i64 @f4(i64 %a, i64 %amt) { ; Check shift amounts that have a constant term. define i64 @f5(i64 %a, i64 %amt) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: rllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i64 %amt, 10 @@ -62,7 +62,7 @@ define i64 @f5(i64 %a, i64 %amt) { ; ...and again with a sign-extended 32-bit shift amount. define i64 @f6(i64 %a, i32 %amt) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: rllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -77,7 +77,7 @@ define i64 @f6(i64 %a, i32 %amt) { ; ...and now with a zero-extended 32-bit shift amount. define i64 @f7(i64 %a, i32 %amt) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: rllg %r2, %r2, 10(%r3) ; CHECK: br %r14 %add = add i32 %amt, 10 @@ -93,7 +93,7 @@ define i64 @f7(i64 %a, i32 %amt) { ; Check shift amounts that have the largest in-range constant term. We could ; mask the amount instead. define i64 @f8(i64 %a, i64 %amt) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: rllg %r2, %r2, 524287(%r3) ; CHECK: br %r14 %add = add i64 %amt, 524287 @@ -107,7 +107,7 @@ define i64 @f8(i64 %a, i64 %amt) { ; Check the next value up, which without masking must use a separate ; addition. define i64 @f9(i64 %a, i64 %amt) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: a{{g?}}fi %r3, 524288 ; CHECK: rllg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -121,7 +121,7 @@ define i64 @f9(i64 %a, i64 %amt) { ; Check cases where 1 is subtracted from the shift amount. define i64 @f10(i64 %a, i64 %amt) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: rllg %r2, %r2, -1(%r3) ; CHECK: br %r14 %suba = sub i64 %amt, 1 @@ -135,7 +135,7 @@ define i64 @f10(i64 %a, i64 %amt) { ; Check the lowest value that can be subtracted from the shift amount. ; Again, we could mask the shift amount instead. define i64 @f11(i64 %a, i64 %amt) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: rllg %r2, %r2, -524288(%r3) ; CHECK: br %r14 %suba = sub i64 %amt, 524288 @@ -149,7 +149,7 @@ define i64 @f11(i64 %a, i64 %amt) { ; Check the next value down, which without masking must use a separate ; addition. define i64 @f12(i64 %a, i64 %amt) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: a{{g?}}fi %r3, -524289 ; CHECK: rllg %r2, %r2, 0(%r3) ; CHECK: br %r14 @@ -163,7 +163,7 @@ define i64 @f12(i64 %a, i64 %amt) { ; Check that we don't try to generate "indexed" shifts. define i64 @f13(i64 %a, i64 %b, i64 %c) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} ; CHECK: rllg %r2, %r2, 0({{%r[34]}}) ; CHECK: br %r14 @@ -177,7 +177,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) { ; Check that the shift amount uses an address register. It cannot be in %r0. define i64 @f14(i64 %a, i64 *%ptr) { -; CHECK: f14: +; CHECK-LABEL: f14: ; CHECK: l %r1, 4(%r3) ; CHECK: rllg %r2, %r2, 0(%r1) ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/shift-09.ll b/test/CodeGen/SystemZ/shift-09.ll new file mode 100644 index 0000000000000..c87cf0d9a1ee8 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-09.ll @@ -0,0 +1,63 @@ +; Test three-operand shifts. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check that we use SLLK over SLL where useful. +define i32 @f1(i32 %a, i32 %b, i32 %amt) { +; CHECK-LABEL: f1: +; CHECK: sllk %r2, %r3, 15(%r4) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = shl i32 %b, %add + ret i32 %shift +} + +; Check that we use SLL over SLLK where possible. +define i32 @f2(i32 %a, i32 %amt) { +; CHECK-LABEL: f2: +; CHECK: sll %r2, 15(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = shl i32 %a, %add + ret i32 %shift +} + +; Check that we use SRLK over SRL where useful. +define i32 @f3(i32 %a, i32 %b, i32 %amt) { +; CHECK-LABEL: f3: +; CHECK: srlk %r2, %r3, 15(%r4) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = lshr i32 %b, %add + ret i32 %shift +} + +; Check that we use SRL over SRLK where possible. +define i32 @f4(i32 %a, i32 %amt) { +; CHECK-LABEL: f4: +; CHECK: srl %r2, 15(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = lshr i32 %a, %add + ret i32 %shift +} + +; Check that we use SRAK over SRA where useful. +define i32 @f5(i32 %a, i32 %b, i32 %amt) { +; CHECK-LABEL: f5: +; CHECK: srak %r2, %r3, 15(%r4) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = ashr i32 %b, %add + ret i32 %shift +} + +; Check that we use SRA over SRAK where possible. +define i32 @f6(i32 %a, i32 %amt) { +; CHECK-LABEL: f6: +; CHECK: sra %r2, 15(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 15 + %shift = ashr i32 %a, %add + ret i32 %shift +} diff --git a/test/CodeGen/SystemZ/shift-10.ll b/test/CodeGen/SystemZ/shift-10.ll new file mode 100644 index 0000000000000..46ed2180dfd4e --- /dev/null +++ b/test/CodeGen/SystemZ/shift-10.ll @@ -0,0 +1,78 @@ +; Test compound shifts. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a shift right followed by a sign extension. This can use two shifts. +define i64 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: sllg [[REG:%r[0-5]]], %r2, 62 +; CHECK: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %shr = lshr i32 %a, 1 + %trunc = trunc i32 %shr to i1 + %ext = sext i1 %trunc to i64 + ret i64 %ext +} + +; ...and again with the highest shift count. +define i64 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: sllg [[REG:%r[0-5]]], %r2, 32 +; CHECK: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %shr = lshr i32 %a, 31 + %trunc = trunc i32 %shr to i1 + %ext = sext i1 %trunc to i64 + ret i64 %ext +} + +; Test a left shift that of an extended right shift in a case where folding +; is possible. +define i64 @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: risbg %r2, %r2, 27, 181, 9 +; CHECK: br %r14 + %shr = lshr i32 %a, 1 + %ext = zext i32 %shr to i64 + %shl = shl i64 %ext, 10 + %and = and i64 %shl, 137438952960 + ret i64 %and +} + +; ...and again with a larger right shift. +define i64 @f4(i32 %a) { +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r2, 30, 158, 3 +; CHECK: br %r14 + %shr = lshr i32 %a, 30 + %ext = sext i32 %shr to i64 + %shl = shl i64 %ext, 33 + %and = and i64 %shl, 8589934592 + ret i64 %and +} + +; Repeat the previous test in a case where all bits outside the +; bottom 3 matter. +define i64 @f5(i32 %a) { +; CHECK-LABEL: f5: +; CHECK: risbg %r2, %r2, 29, 158, 3 +; CHECK: lhi %r2, 7 +; CHECK: br %r14 + %shr = lshr i32 %a, 30 + %ext = sext i32 %shr to i64 + %shl = shl i64 %ext, 33 + %or = or i64 %shl, 7 + ret i64 %or +} + +; Test that SRA gets replaced with SRL if the sign bit is the only one +; that matters. +define i64 @f6(i64 %a) { +; CHECK-LABEL: f6: +; CHECK: risbg %r2, %r2, 55, 183, 19 +; CHECK: br %r14 + %shl = shl i64 %a, 10 + %shr = ashr i64 %shl, 60 + %and = and i64 %shr, 256 + ret i64 %and +} diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll new file mode 100644 index 0000000000000..ca64a88f2a0d6 --- /dev/null +++ b/test/CodeGen/SystemZ/spill-01.ll @@ -0,0 +1,548 @@ +; Test spilling using MVC. The tests here assume z10 register pressure, +; without the high words being available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo() + +@g0 = global i32 0 +@g1 = global i32 1 +@g2 = global i32 2 +@g3 = global i32 3 +@g4 = global i32 4 +@g5 = global i32 5 +@g6 = global i32 6 +@g7 = global i32 7 +@g8 = global i32 8 +@g9 = global i32 9 + +@h0 = global i64 0 +@h1 = global i64 1 +@h2 = global i64 2 +@h3 = global i64 3 +@h4 = global i64 4 +@h5 = global i64 5 +@h6 = global i64 6 +@h7 = global i64 7 +@h8 = global i64 8 +@h9 = global i64 9 + +; This function shouldn't spill anything +define void @f1(i32 *%ptr0) { +; CHECK-LABEL: f1: +; CHECK: stmg +; CHECK: aghi %r15, -160 +; CHECK-NOT: %r15 +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: lmg +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i32 2 + %ptr2 = getelementptr i32 *%ptr0, i32 4 + %ptr3 = getelementptr i32 *%ptr0, i32 6 + %ptr4 = getelementptr i32 *%ptr0, i32 8 + %ptr5 = getelementptr i32 *%ptr0, i32 10 + %ptr6 = getelementptr i32 *%ptr0, i32 12 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + + call void @foo() + + store i32 %val0, i32 *%ptr0 + store i32 %val1, i32 *%ptr1 + store i32 %val2, i32 *%ptr2 + store i32 %val3, i32 *%ptr3 + store i32 %val4, i32 *%ptr4 + store i32 %val5, i32 *%ptr5 + store i32 %val6, i32 *%ptr6 + + ret void +} + +; Test a case where at least one i32 load and at least one i32 store +; need spills. +define void @f2(i32 *%ptr0) { +; CHECK-LABEL: f2: +; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}}) +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + + call void @foo() + + store i32 %val0, i32 *%ptr0 + store i32 %val1, i32 *%ptr1 + store i32 %val2, i32 *%ptr2 + store i32 %val3, i32 *%ptr3 + store i32 %val4, i32 *%ptr4 + store i32 %val5, i32 *%ptr5 + store i32 %val6, i32 *%ptr6 + store i32 %val7, i32 *%ptr7 + store i32 %val8, i32 *%ptr8 + + ret void +} + +; Test a case where at least one i64 load and at least one i64 store +; need spills. +define void @f3(i64 *%ptr0) { +; CHECK-LABEL: f3: +; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}}) +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + + call void @foo() + + store i64 %val0, i64 *%ptr0 + store i64 %val1, i64 *%ptr1 + store i64 %val2, i64 *%ptr2 + store i64 %val3, i64 *%ptr3 + store i64 %val4, i64 *%ptr4 + store i64 %val5, i64 *%ptr5 + store i64 %val6, i64 *%ptr6 + store i64 %val7, i64 *%ptr7 + store i64 %val8, i64 *%ptr8 + + ret void +} + + +; Test a case where at least at least one f32 load and at least one f32 store +; need spills. The 8 call-saved FPRs could be used for 8 of the %vals +; (and are at the time of writing), but it would really be better to use +; MVC for all 10. +define void @f4(float *%ptr0) { +; CHECK-LABEL: f4: +; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}}) +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float *%ptr0, i64 2 + %ptr2 = getelementptr float *%ptr0, i64 4 + %ptr3 = getelementptr float *%ptr0, i64 6 + %ptr4 = getelementptr float *%ptr0, i64 8 + %ptr5 = getelementptr float *%ptr0, i64 10 + %ptr6 = getelementptr float *%ptr0, i64 12 + %ptr7 = getelementptr float *%ptr0, i64 14 + %ptr8 = getelementptr float *%ptr0, i64 16 + %ptr9 = getelementptr float *%ptr0, i64 18 + + %val0 = load float *%ptr0 + %val1 = load float *%ptr1 + %val2 = load float *%ptr2 + %val3 = load float *%ptr3 + %val4 = load float *%ptr4 + %val5 = load float *%ptr5 + %val6 = load float *%ptr6 + %val7 = load float *%ptr7 + %val8 = load float *%ptr8 + %val9 = load float *%ptr9 + + call void @foo() + + store float %val0, float *%ptr0 + store float %val1, float *%ptr1 + store float %val2, float *%ptr2 + store float %val3, float *%ptr3 + store float %val4, float *%ptr4 + store float %val5, float *%ptr5 + store float %val6, float *%ptr6 + store float %val7, float *%ptr7 + store float %val8, float *%ptr8 + store float %val9, float *%ptr9 + + ret void +} + +; Similarly for f64. +define void @f5(double *%ptr0) { +; CHECK-LABEL: f5: +; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}}) +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 + %ptr3 = getelementptr double *%ptr0, i64 6 + %ptr4 = getelementptr double *%ptr0, i64 8 + %ptr5 = getelementptr double *%ptr0, i64 10 + %ptr6 = getelementptr double *%ptr0, i64 12 + %ptr7 = getelementptr double *%ptr0, i64 14 + %ptr8 = getelementptr double *%ptr0, i64 16 + %ptr9 = getelementptr double *%ptr0, i64 18 + + %val0 = load double *%ptr0 + %val1 = load double *%ptr1 + %val2 = load double *%ptr2 + %val3 = load double *%ptr3 + %val4 = load double *%ptr4 + %val5 = load double *%ptr5 + %val6 = load double *%ptr6 + %val7 = load double *%ptr7 + %val8 = load double *%ptr8 + %val9 = load double *%ptr9 + + call void @foo() + + store double %val0, double *%ptr0 + store double %val1, double *%ptr1 + store double %val2, double *%ptr2 + store double %val3, double *%ptr3 + store double %val4, double *%ptr4 + store double %val5, double *%ptr5 + store double %val6, double *%ptr6 + store double %val7, double *%ptr7 + store double %val8, double *%ptr8 + store double %val9, double *%ptr9 + + ret void +} + +; Repeat f2 with atomic accesses. We shouldn't use MVC here. +define void @f6(i32 *%ptr0) { +; CHECK-LABEL: f6: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + + %val0 = load atomic i32 *%ptr0 unordered, align 4 + %val1 = load atomic i32 *%ptr1 unordered, align 4 + %val2 = load atomic i32 *%ptr2 unordered, align 4 + %val3 = load atomic i32 *%ptr3 unordered, align 4 + %val4 = load atomic i32 *%ptr4 unordered, align 4 + %val5 = load atomic i32 *%ptr5 unordered, align 4 + %val6 = load atomic i32 *%ptr6 unordered, align 4 + %val7 = load atomic i32 *%ptr7 unordered, align 4 + %val8 = load atomic i32 *%ptr8 unordered, align 4 + + call void @foo() + + store atomic i32 %val0, i32 *%ptr0 unordered, align 4 + store atomic i32 %val1, i32 *%ptr1 unordered, align 4 + store atomic i32 %val2, i32 *%ptr2 unordered, align 4 + store atomic i32 %val3, i32 *%ptr3 unordered, align 4 + store atomic i32 %val4, i32 *%ptr4 unordered, align 4 + store atomic i32 %val5, i32 *%ptr5 unordered, align 4 + store atomic i32 %val6, i32 *%ptr6 unordered, align 4 + store atomic i32 %val7, i32 *%ptr7 unordered, align 4 + store atomic i32 %val8, i32 *%ptr8 unordered, align 4 + + ret void +} + +; ...likewise volatile accesses. +define void @f7(i32 *%ptr0) { +; CHECK-LABEL: f7: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + + %val0 = load volatile i32 *%ptr0 + %val1 = load volatile i32 *%ptr1 + %val2 = load volatile i32 *%ptr2 + %val3 = load volatile i32 *%ptr3 + %val4 = load volatile i32 *%ptr4 + %val5 = load volatile i32 *%ptr5 + %val6 = load volatile i32 *%ptr6 + %val7 = load volatile i32 *%ptr7 + %val8 = load volatile i32 *%ptr8 + + call void @foo() + + store volatile i32 %val0, i32 *%ptr0 + store volatile i32 %val1, i32 *%ptr1 + store volatile i32 %val2, i32 *%ptr2 + store volatile i32 %val3, i32 *%ptr3 + store volatile i32 %val4, i32 *%ptr4 + store volatile i32 %val5, i32 *%ptr5 + store volatile i32 %val6, i32 *%ptr6 + store volatile i32 %val7, i32 *%ptr7 + store volatile i32 %val8, i32 *%ptr8 + + ret void +} + +; Check that LRL and STRL are not converted. +define void @f8() { +; CHECK-LABEL: f8: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val0 = load i32 *@g0 + %val1 = load i32 *@g1 + %val2 = load i32 *@g2 + %val3 = load i32 *@g3 + %val4 = load i32 *@g4 + %val5 = load i32 *@g5 + %val6 = load i32 *@g6 + %val7 = load i32 *@g7 + %val8 = load i32 *@g8 + %val9 = load i32 *@g9 + + call void @foo() + + store i32 %val0, i32 *@g0 + store i32 %val1, i32 *@g1 + store i32 %val2, i32 *@g2 + store i32 %val3, i32 *@g3 + store i32 %val4, i32 *@g4 + store i32 %val5, i32 *@g5 + store i32 %val6, i32 *@g6 + store i32 %val7, i32 *@g7 + store i32 %val8, i32 *@g8 + store i32 %val9, i32 *@g9 + + ret void +} + +; Likewise LGRL and STGRL. +define void @f9() { +; CHECK-LABEL: f9: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val0 = load i64 *@h0 + %val1 = load i64 *@h1 + %val2 = load i64 *@h2 + %val3 = load i64 *@h3 + %val4 = load i64 *@h4 + %val5 = load i64 *@h5 + %val6 = load i64 *@h6 + %val7 = load i64 *@h7 + %val8 = load i64 *@h8 + %val9 = load i64 *@h9 + + call void @foo() + + store i64 %val0, i64 *@h0 + store i64 %val1, i64 *@h1 + store i64 %val2, i64 *@h2 + store i64 %val3, i64 *@h3 + store i64 %val4, i64 *@h4 + store i64 %val5, i64 *@h5 + store i64 %val6, i64 *@h6 + store i64 %val7, i64 *@h7 + store i64 %val8, i64 *@h8 + store i64 %val9, i64 *@h9 + + ret void +} + +; This showed a problem with the way stack coloring updated instructions. +; The copy from %val9 to %newval8 can be done using an MVC, which then +; has two frame index operands. Stack coloring chose a valid renumbering +; [FI0, FI1] -> [FI1, FI2], but applied it in the form FI0 -> FI1 -> FI2, +; so that both operands ended up being the same. +define void @f10() { +; CHECK-LABEL: f10: +; CHECK: lgrl [[REG:%r[0-9]+]], h9 +; CHECK: stg [[REG]], [[VAL9:[0-9]+]](%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: brasl %r14, foo@PLT +; CHECK: mvc [[NEWVAL8:[0-9]+]](8,%r15), [[VAL9]](%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: lg [[REG:%r[0-9]+]], [[NEWVAL8]](%r15) +; CHECK: stgrl [[REG]], h8 +; CHECK: br %r14 +entry: + %val0 = load volatile i64 *@h0 + %val1 = load volatile i64 *@h1 + %val2 = load volatile i64 *@h2 + %val3 = load volatile i64 *@h3 + %val4 = load volatile i64 *@h4 + %val5 = load volatile i64 *@h5 + %val6 = load volatile i64 *@h6 + %val7 = load volatile i64 *@h7 + %val8 = load volatile i64 *@h8 + %val9 = load volatile i64 *@h9 + + call void @foo() + + store volatile i64 %val0, i64 *@h0 + store volatile i64 %val1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + + %check = load volatile i64 *@h0 + %cond = icmp eq i64 %check, 0 + br i1 %cond, label %skip, label %fallthru + +fallthru: + call void @foo() + + store volatile i64 %val0, i64 *@h0 + store volatile i64 %val1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + store volatile i64 %val8, i64 *@h8 + br label %skip + +skip: + %newval8 = phi i64 [ %val8, %entry ], [ %val9, %fallthru ] + call void @foo() + + store volatile i64 %val0, i64 *@h0 + store volatile i64 %val1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + store volatile i64 %newval8, i64 *@h8 + store volatile i64 %val9, i64 *@h9 + + ret void +} + +; This used to generate a no-op MVC. It is very sensitive to spill heuristics. +define void @f11() { +; CHECK-LABEL: f11: +; CHECK-NOT: mvc [[OFFSET:[0-9]+]](8,%r15), [[OFFSET]](%r15) +; CHECK: br %r14 +entry: + %val0 = load volatile i64 *@h0 + %val1 = load volatile i64 *@h1 + %val2 = load volatile i64 *@h2 + %val3 = load volatile i64 *@h3 + %val4 = load volatile i64 *@h4 + %val5 = load volatile i64 *@h5 + %val6 = load volatile i64 *@h6 + %val7 = load volatile i64 *@h7 + + %altval0 = load volatile i64 *@h0 + %altval1 = load volatile i64 *@h1 + + call void @foo() + + store volatile i64 %val0, i64 *@h0 + store volatile i64 %val1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + + %check = load volatile i64 *@h0 + %cond = icmp eq i64 %check, 0 + br i1 %cond, label %a1, label %b1 + +a1: + call void @foo() + br label %join1 + +b1: + call void @foo() + br label %join1 + +join1: + %newval0 = phi i64 [ %val0, %a1 ], [ %altval0, %b1 ] + + call void @foo() + + store volatile i64 %val1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + br i1 %cond, label %a2, label %b2 + +a2: + call void @foo() + br label %join2 + +b2: + call void @foo() + br label %join2 + +join2: + %newval1 = phi i64 [ %val1, %a2 ], [ %altval1, %b2 ] + + call void @foo() + + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + + call void @foo() + + store volatile i64 %newval0, i64 *@h0 + store volatile i64 %newval1, i64 *@h1 + store volatile i64 %val2, i64 *@h2 + store volatile i64 %val3, i64 *@h3 + store volatile i64 %val4, i64 *@h4 + store volatile i64 %val5, i64 *@h5 + store volatile i64 %val6, i64 *@h6 + store volatile i64 %val7, i64 *@h7 + + ret void +} diff --git a/test/CodeGen/SystemZ/strcmp-01.ll b/test/CodeGen/SystemZ/strcmp-01.ll new file mode 100644 index 0000000000000..122c160babaf4 --- /dev/null +++ b/test/CodeGen/SystemZ/strcmp-01.ll @@ -0,0 +1,70 @@ +; Test strcmp using CLST, i32 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll %r2, [[REG]], 31 +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: je {{\.L.*}} +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll %r2, [[REG]], 31 +; CHECK: jl {{\.L*}} +; CHECK: br %r14 +entry: + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/strcmp-02.ll b/test/CodeGen/SystemZ/strcmp-02.ll new file mode 100644 index 0000000000000..27bd00b47fd38 --- /dev/null +++ b/test/CodeGen/SystemZ/strcmp-02.ll @@ -0,0 +1,72 @@ +; Test strcmp using CLST, i64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i64 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: br %r14 + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + ret i64 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: je {{\.L.*}} +; CHECK: br %r14 + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i64 @f3(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: lgfr %r2, [[REG]] +; CHECK: jl {{\.L*}} +; CHECK: br %r14 +entry: + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/strcpy-01.ll b/test/CodeGen/SystemZ/strcpy-01.ll new file mode 100644 index 0000000000000..29bab629ecf85 --- /dev/null +++ b/test/CodeGen/SystemZ/strcpy-01.ll @@ -0,0 +1,50 @@ +; Test strcpy using MVST. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@strcpy(i8 *%dest, i8 *%src) +declare i8 *@stpcpy(i8 *%dest, i8 *%src) + +; Check strcpy. +define i8 *@f1(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lgr [[REG:%r[145]]], %r2 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: mvst [[REG]], %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NOT: %r2 +; CHECK: br %r14 + %res = call i8 *@strcpy(i8 *%dest, i8 *%src) + ret i8 *%res +} + +; Check stpcpy. +define i8 *@f2(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: mvst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NOT: %r2 +; CHECK: br %r14 + %res = call i8 *@stpcpy(i8 *%dest, i8 *%src) + ret i8 *%res +} + +; Check correct operation with other loads and stores. The load must +; come before the loop and the store afterwards. +define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) { +; CHECK-LABEL: f3: +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: l %r2, 0(%r5) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: mvst %r3, %r4 +; CHECK-NEXT: jo [[LABEL]] +; CHECK: mvhi 0(%r6), 0 +; CHECK: br %r14 + %res = load i32 *%resptr + %unused = call i8 *@strcpy(i8 *%dest, i8 *%src) + store i32 0, i32 *%storeptr + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/strlen-01.ll b/test/CodeGen/SystemZ/strlen-01.ll new file mode 100644 index 0000000000000..16161d4d2c82e --- /dev/null +++ b/test/CodeGen/SystemZ/strlen-01.ll @@ -0,0 +1,39 @@ +; Test strlen using SRST, i64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @strlen(i8 *%src) +declare i64 @strnlen(i8 *%src, i64 %len) + +; Test strlen with its proper i64 prototype. It would also be valid for +; the uses of %r3 and REG after the LGR to be swapped. +define i64 @f1(i32 %dummy, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: lgr [[REG:%r[145]]], %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: sgr %r2, %r3 +; CHECK: br %r14 + %res = call i64 @strlen(i8 *%src) + ret i64 %res +} + +; Test strnlen with its proper i64 prototype. +define i64 @f2(i64 %len, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-DAG: agr %r2, %r3 +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lgr [[REG:%r[145]]], %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: sgr %r2, %r3 +; CHECK: br %r14 + %res = call i64 @strnlen(i8 *%src, i64 %len) + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/strlen-02.ll b/test/CodeGen/SystemZ/strlen-02.ll new file mode 100644 index 0000000000000..e1abbff4b4e02 --- /dev/null +++ b/test/CodeGen/SystemZ/strlen-02.ll @@ -0,0 +1,39 @@ +; Test strlen using SRST, i32 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @strlen(i8 *%src) +declare i32 @strnlen(i8 *%src, i32 %len) + +; Test strlen with an i32-based prototype. It would also be valid for +; the uses of %r3 and REG after the LGR to be swapped. +define i32 @f1(i32 %dummy, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: lgr [[REG:%r[145]]], %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: sgr %r2, %r3 +; CHECK: br %r14 + %res = call i32 @strlen(i8 *%src) + ret i32 %res +} + +; Test strnlen with an i32-based prototype. +define i32 @f2(i32 zeroext %len, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-DAG: agr %r2, %r3 +; CHECK-DAG: lhi %r0, 0 +; CHECK-DAG: lgr [[REG:%r[145]]], %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK-NEXT: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: sgr %r2, %r3 +; CHECK: br %r14 + %res = call i32 @strnlen(i8 *%src, i32 %len) + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll index 49037ad51c69c..16bc8f6e500f2 100644 --- a/test/CodeGen/SystemZ/tls-01.ll +++ b/test/CodeGen/SystemZ/tls-01.ll @@ -11,7 +11,7 @@ define i32 *@foo() { ; CHECK-CP: .LCP{{.*}}: ; CHECK-CP: .quad x@NTPOFF ; -; CHECK-MAIN: foo: +; CHECK-MAIN-LABEL: foo: ; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 ; CHECK-MAIN: sllg %r2, [[HIGH]], 32 ; CHECK-MAIN: ear %r2, %a1 diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll new file mode 100644 index 0000000000000..526a068100efe --- /dev/null +++ b/test/CodeGen/SystemZ/unaligned-01.ll @@ -0,0 +1,62 @@ +; Check that unaligned accesses are allowed in general. We check the +; few exceptions (like CRL) in their respective test files. +; +; FIXME: -combiner-alias-analysis (the default for SystemZ) stops +; f1 from being optimized. +; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \ +; RUN: | FileCheck %s + +; Check that these four byte stores become a single word store. +define void @f1(i8 *%ptr) { +; CHECK: f1 +; CHECK: iilf [[REG:%r[0-5]]], 66051 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %off1 = getelementptr i8 *%ptr, i64 1 + %off2 = getelementptr i8 *%ptr, i64 2 + %off3 = getelementptr i8 *%ptr, i64 3 + store i8 0, i8 *%ptr + store i8 1, i8 *%off1 + store i8 2, i8 *%off2 + store i8 3, i8 *%off3 + ret void +} + +; Check that unaligned 2-byte accesses are allowed. +define i16 @f2(i16 *%src, i16 *%dst) { +; CHECK-LABEL: f2: +; CHECK: lh %r2, 0(%r2) +; CHECK: sth %r2, 0(%r3) +; CHECK: br %r14 + %val = load i16 *%src, align 1 + store i16 %val, i16 *%dst, align 1 + ret i16 %val +} + +; Check that unaligned 4-byte accesses are allowed. +define i32 @f3(i32 *%src1, i32 *%src2, i32 *%dst) { +; CHECK-LABEL: f3: +; CHECK: l %r2, 0(%r2) +; CHECK: s %r2, 0(%r3) +; CHECK: st %r2, 0(%r4) +; CHECK: br %r14 + %val1 = load i32 *%src1, align 1 + %val2 = load i32 *%src2, align 2 + %sub = sub i32 %val1, %val2 + store i32 %sub, i32 *%dst, align 1 + ret i32 %sub +} + +; Check that unaligned 8-byte accesses are allowed. +define i64 @f4(i64 *%src1, i64 *%src2, i64 *%dst) { +; CHECK-LABEL: f4: +; CHECK: lg %r2, 0(%r2) +; CHECK: sg %r2, 0(%r3) +; CHECK: stg %r2, 0(%r4) +; CHECK: br %r14 + %val1 = load i64 *%src1, align 1 + %val2 = load i64 *%src2, align 2 + %sub = sub i64 %val1, %val2 + store i64 %sub, i64 *%dst, align 4 + ret i64 %sub +} diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll index 30bdbe7901f9c..185d6bb0a7541 100644 --- a/test/CodeGen/SystemZ/xor-01.ll +++ b/test/CodeGen/SystemZ/xor-01.ll @@ -1,10 +1,13 @@ ; Test 32-bit XORs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() ; Check XR. define i32 @f1(i32 %a, i32 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xr %r2, %r3 ; CHECK: br %r14 %xor = xor i32 %a, %b @@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) { ; Check the low end of the X range. define i32 @f2(i32 %a, i32 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: x %r2, 0(%r3) ; CHECK: br %r14 %b = load i32 *%src @@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) { ; Check the high end of the aligned X range. define i32 @f3(i32 %a, i32 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: x %r2, 4092(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1023 @@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) { ; Check the next word up, which should use XY instead of X. define i32 @f4(i32 %a, i32 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xy %r2, 4096(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 1024 @@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) { ; Check the high end of the aligned XY range. define i32 @f5(i32 %a, i32 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xy %r2, 524284(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 131071 @@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) { ; Check the next word up, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f6(i32 %a, i32 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: agfi %r3, 524288 ; CHECK: x %r2, 0(%r3) ; CHECK: br %r14 @@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) { ; Check the high end of the negative aligned XY range. define i32 @f7(i32 %a, i32 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: xy %r2, -4(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -1 @@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) { ; Check the low end of the XY range. define i32 @f8(i32 %a, i32 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: xy %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i32 *%src, i64 -131072 @@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) { ; Check the next word down, which needs separate address logic. ; Other sequences besides this one would be OK. define i32 @f9(i32 %a, i32 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: agfi %r3, -524292 ; CHECK: x %r2, 0(%r3) ; CHECK: br %r14 @@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) { ; Check that X allows an index. define i32 @f10(i32 %a, i64 %src, i64 %index) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: x %r2, 4092({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) { ; Check that XY allows an index. define i32 @f11(i32 %a, i64 %src, i64 %index) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: xy %r2, 4096({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) { %xor = xor i32 %a, %b ret i32 %xor } + +; Check that XORs of spilled values can use X rather than XR. +define i32 @f12(i32 *%ptr0) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK: x %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32 *%ptr0, i64 18 + + %val0 = load i32 *%ptr0 + %val1 = load i32 *%ptr1 + %val2 = load i32 *%ptr2 + %val3 = load i32 *%ptr3 + %val4 = load i32 *%ptr4 + %val5 = load i32 *%ptr5 + %val6 = load i32 *%ptr6 + %val7 = load i32 *%ptr7 + %val8 = load i32 *%ptr8 + %val9 = load i32 *%ptr9 + + %ret = call i32 @foo() + + %xor0 = xor i32 %ret, %val0 + %xor1 = xor i32 %xor0, %val1 + %xor2 = xor i32 %xor1, %val2 + %xor3 = xor i32 %xor2, %val3 + %xor4 = xor i32 %xor3, %val4 + %xor5 = xor i32 %xor4, %val5 + %xor6 = xor i32 %xor5, %val6 + %xor7 = xor i32 %xor6, %val7 + %xor8 = xor i32 %xor7, %val8 + %xor9 = xor i32 %xor8, %val9 + + ret i32 %xor9 +} diff --git a/test/CodeGen/SystemZ/xor-02.ll b/test/CodeGen/SystemZ/xor-02.ll index c2b52b9b8e20b..7e28e231cfcd3 100644 --- a/test/CodeGen/SystemZ/xor-02.ll +++ b/test/CodeGen/SystemZ/xor-02.ll @@ -4,7 +4,7 @@ ; Check the lowest useful XILF value. define i32 @f1(i32 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xilf %r2, 1 ; CHECK: br %r14 %xor = xor i32 %a, 1 @@ -13,7 +13,7 @@ define i32 @f1(i32 %a) { ; Check the high end of the signed range. define i32 @f2(i32 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: xilf %r2, 2147483647 ; CHECK: br %r14 %xor = xor i32 %a, 2147483647 @@ -23,7 +23,7 @@ define i32 @f2(i32 %a) { ; Check the low end of the signed range, which should be treated ; as a positive value. define i32 @f3(i32 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xilf %r2, 2147483648 ; CHECK: br %r14 %xor = xor i32 %a, -2147483648 @@ -32,7 +32,7 @@ define i32 @f3(i32 %a) { ; Check the high end of the XILF range. define i32 @f4(i32 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xilf %r2, 4294967295 ; CHECK: br %r14 %xor = xor i32 %a, 4294967295 diff --git a/test/CodeGen/SystemZ/xor-03.ll b/test/CodeGen/SystemZ/xor-03.ll index a4851b33090d8..ab7f2584b60d7 100644 --- a/test/CodeGen/SystemZ/xor-03.ll +++ b/test/CodeGen/SystemZ/xor-03.ll @@ -1,10 +1,13 @@ ; Test 64-bit XORs in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() ; Check XGR. define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xgr %r2, %r3 ; CHECK: br %r14 %xor = xor i64 %a, %b @@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) { ; Check XG with no displacement. define i64 @f2(i64 %a, i64 *%src) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: xg %r2, 0(%r3) ; CHECK: br %r14 %b = load i64 *%src @@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) { ; Check the high end of the aligned XG range. define i64 @f3(i64 %a, i64 *%src) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xg %r2, 524280(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 65535 @@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) { ; Check the next doubleword up, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f4(i64 %a, i64 *%src) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: agfi %r3, 524288 ; CHECK: xg %r2, 0(%r3) ; CHECK: br %r14 @@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) { ; Check the high end of the negative aligned XG range. define i64 @f5(i64 %a, i64 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xg %r2, -8(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -1 @@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) { ; Check the low end of the XG range. define i64 @f6(i64 %a, i64 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xg %r2, -524288(%r3) ; CHECK: br %r14 %ptr = getelementptr i64 *%src, i64 -65536 @@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) { ; Check the next doubleword down, which needs separate address logic. ; Other sequences besides this one would be OK. define i64 @f7(i64 %a, i64 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: agfi %r3, -524296 ; CHECK: xg %r2, 0(%r3) ; CHECK: br %r14 @@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) { ; Check that XG allows an index. define i64 @f8(i64 %a, i64 %src, i64 %index) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: xg %r2, 524280({{%r4,%r3|%r3,%r4}}) ; CHECK: br %r14 %add1 = add i64 %src, %index @@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) { %xor = xor i64 %a, %b ret i64 %xor } + +; Check that XORs of spilled values can use OG rather than OGR. +define i64 @f9(i64 *%ptr0) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, foo@PLT +; CHECK: xg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64 *%ptr0, i64 18 + + %val0 = load i64 *%ptr0 + %val1 = load i64 *%ptr1 + %val2 = load i64 *%ptr2 + %val3 = load i64 *%ptr3 + %val4 = load i64 *%ptr4 + %val5 = load i64 *%ptr5 + %val6 = load i64 *%ptr6 + %val7 = load i64 *%ptr7 + %val8 = load i64 *%ptr8 + %val9 = load i64 *%ptr9 + + %ret = call i64 @foo() + + %xor0 = xor i64 %ret, %val0 + %xor1 = xor i64 %xor0, %val1 + %xor2 = xor i64 %xor1, %val2 + %xor3 = xor i64 %xor2, %val3 + %xor4 = xor i64 %xor3, %val4 + %xor5 = xor i64 %xor4, %val5 + %xor6 = xor i64 %xor5, %val6 + %xor7 = xor i64 %xor6, %val7 + %xor8 = xor i64 %xor7, %val8 + %xor9 = xor i64 %xor8, %val9 + + ret i64 %xor9 +} diff --git a/test/CodeGen/SystemZ/xor-04.ll b/test/CodeGen/SystemZ/xor-04.ll index cc141d391a85e..44f0a4cc39d0c 100644 --- a/test/CodeGen/SystemZ/xor-04.ll +++ b/test/CodeGen/SystemZ/xor-04.ll @@ -4,7 +4,7 @@ ; Check the lowest useful XILF value. define i64 @f1(i64 %a) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xilf %r2, 1 ; CHECK: br %r14 %xor = xor i64 %a, 1 @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { ; Check the high end of the XILF range. define i64 @f2(i64 %a) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: xilf %r2, 4294967295 ; CHECK: br %r14 %xor = xor i64 %a, 4294967295 @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { ; Check the lowest useful XIHF value, which is one up from the above. define i64 @f3(i64 %a) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xihf %r2, 1 ; CHECK: br %r14 %xor = xor i64 %a, 4294967296 @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { ; Check the next value up again, which needs a combination of XIHF and XILF. define i64 @f4(i64 %a) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xihf %r2, 1 ; CHECK: xilf %r2, 4294967295 ; CHECK: br %r14 @@ -41,7 +41,7 @@ define i64 @f4(i64 %a) { ; Check the high end of the XIHF range. define i64 @f5(i64 %a) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xihf %r2, 4294967295 ; CHECK: br %r14 %xor = xor i64 %a, -4294967296 @@ -50,7 +50,7 @@ define i64 @f5(i64 %a) { ; Check the next value up, which again must use XIHF and XILF. define i64 @f6(i64 %a) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xihf %r2, 4294967295 ; CHECK: xilf %r2, 1 ; CHECK: br %r14 @@ -60,7 +60,7 @@ define i64 @f6(i64 %a) { ; Check full bitwise negation define i64 @f7(i64 %a) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: xihf %r2, 4294967295 ; CHECK: xilf %r2, 4294967295 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/xor-05.ll b/test/CodeGen/SystemZ/xor-05.ll index 9ef0d20ca52b7..fbd5660ad058d 100644 --- a/test/CodeGen/SystemZ/xor-05.ll +++ b/test/CodeGen/SystemZ/xor-05.ll @@ -4,7 +4,7 @@ ; Check the lowest useful constant, expressed as a signed integer. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xi 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) { ; Check the highest useful constant, expressed as a signed integer. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) { ; Check the lowest useful constant, expressed as an unsigned integer. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xi 0(%r2), 1 ; CHECK: br %r14 %val = load i8 *%ptr @@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) { ; Check the highest useful constant, expressed as a unsigned integer. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) { ; Check the high end of the XI range. define void @f5(i8 *%src) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xi 4095(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4095 @@ -60,7 +60,7 @@ define void @f5(i8 *%src) { ; Check the next byte up, which should use XIY instead of XI. define void @f6(i8 *%src) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xiy 4096(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 4096 @@ -72,7 +72,7 @@ define void @f6(i8 *%src) { ; Check the high end of the XIY range. define void @f7(i8 *%src) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: xiy 524287(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 524287 @@ -85,7 +85,7 @@ define void @f7(i8 *%src) { ; Check the next byte up, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f8(i8 *%src) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: agfi %r2, 524288 ; CHECK: xi 0(%r2), 127 ; CHECK: br %r14 @@ -98,7 +98,7 @@ define void @f8(i8 *%src) { ; Check the high end of the negative XIY range. define void @f9(i8 *%src) { -; CHECK: f9: +; CHECK-LABEL: f9: ; CHECK: xiy -1(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -1 @@ -110,7 +110,7 @@ define void @f9(i8 *%src) { ; Check the low end of the XIY range. define void @f10(i8 *%src) { -; CHECK: f10: +; CHECK-LABEL: f10: ; CHECK: xiy -524288(%r2), 127 ; CHECK: br %r14 %ptr = getelementptr i8 *%src, i64 -524288 @@ -123,7 +123,7 @@ define void @f10(i8 *%src) { ; Check the next byte down, which needs separate address logic. ; Other sequences besides this one would be OK. define void @f11(i8 *%src) { -; CHECK: f11: +; CHECK-LABEL: f11: ; CHECK: agfi %r2, -524289 ; CHECK: xi 0(%r2), 127 ; CHECK: br %r14 @@ -136,7 +136,7 @@ define void @f11(i8 *%src) { ; Check that XI does not allow an index define void @f12(i64 %src, i64 %index) { -; CHECK: f12: +; CHECK-LABEL: f12: ; CHECK: agr %r2, %r3 ; CHECK: xi 4095(%r2), 127 ; CHECK: br %r14 @@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) { ; Check that XIY does not allow an index define void @f13(i64 %src, i64 %index) { -; CHECK: f13: +; CHECK-LABEL: f13: ; CHECK: agr %r2, %r3 ; CHECK: xiy 4096(%r2), 127 ; CHECK: br %r14 diff --git a/test/CodeGen/SystemZ/xor-06.ll b/test/CodeGen/SystemZ/xor-06.ll index 0ffff47c2b5a2..f39c0fec4e402 100644 --- a/test/CodeGen/SystemZ/xor-06.ll +++ b/test/CodeGen/SystemZ/xor-06.ll @@ -5,7 +5,7 @@ ; Zero extension to 32 bits, negative constant. define void @f1(i8 *%ptr) { -; CHECK: f1: +; CHECK-LABEL: f1: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) { ; Zero extension to 64 bits, negative constant. define void @f2(i8 *%ptr) { -; CHECK: f2: +; CHECK-LABEL: f2: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) { ; Zero extension to 32 bits, positive constant. define void @f3(i8 *%ptr) { -; CHECK: f3: +; CHECK-LABEL: f3: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) { ; Zero extension to 64 bits, positive constant. define void @f4(i8 *%ptr) { -; CHECK: f4: +; CHECK-LABEL: f4: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) { ; Sign extension to 32 bits, negative constant. define void @f5(i8 *%ptr) { -; CHECK: f5: +; CHECK-LABEL: f5: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) { ; Sign extension to 64 bits, negative constant. define void @f6(i8 *%ptr) { -; CHECK: f6: +; CHECK-LABEL: f6: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) { ; Sign extension to 32 bits, positive constant. define void @f7(i8 *%ptr) { -; CHECK: f7: +; CHECK-LABEL: f7: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr @@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) { ; Sign extension to 64 bits, positive constant. define void @f8(i8 *%ptr) { -; CHECK: f8: +; CHECK-LABEL: f8: ; CHECK: xi 0(%r2), 254 ; CHECK: br %r14 %val = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/xor-07.ll b/test/CodeGen/SystemZ/xor-07.ll new file mode 100644 index 0000000000000..ec2a0385b1611 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-07.ll @@ -0,0 +1,39 @@ +; Test the three-operand forms of XOR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check XRK. +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f1: +; CHECK: xrk %r2, %r3, %r4 +; CHECK: br %r14 + %xor = xor i32 %b, %c + ret i32 %xor +} + +; Check that we can still use XR in obvious cases. +define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: xr %r2, %r3 +; CHECK: br %r14 + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check XGRK. +define i64 @f3(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: f3: +; CHECK: xgrk %r2, %r3, %r4 +; CHECK: br %r14 + %xor = xor i64 %b, %c + ret i64 %xor +} + +; Check that we can still use XGR in obvious cases. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: xgr %r2, %r3 +; CHECK: br %r14 + %xor = xor i64 %a, %b + ret i64 %xor +} diff --git a/test/CodeGen/SystemZ/xor-08.ll b/test/CodeGen/SystemZ/xor-08.ll new file mode 100644 index 0000000000000..8cba41e742ce0 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-08.ll @@ -0,0 +1,57 @@ +; Test memory-to-memory XORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the simple i8 case. +define void @f1(i8 *%ptr1) { +; CHECK-LABEL: f1: +; CHECK: xc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %old = load i8 *%ptr2 + %xor = xor i8 %val, %old + store i8 %xor, i8 *%ptr2 + ret void +} + +; Test the simple i16 case. +define void @f2(i16 *%ptr1) { +; CHECK-LABEL: f2: +; CHECK: xc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %old = load i16 *%ptr2 + %xor = xor i16 %val, %old + store i16 %xor, i16 *%ptr2 + ret void +} + +; Test the simple i32 case. +define void @f3(i32 *%ptr1) { +; CHECK-LABEL: f3: +; CHECK: xc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %old = load i32 *%ptr2 + %xor = xor i32 %old, %val + store i32 %xor, i32 *%ptr2 + ret void +} + +; Test the i64 case. +define void @f4(i64 *%ptr1) { +; CHECK-LABEL: f4: +; CHECK: xc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + %old = load i64 *%ptr2 + %xor = xor i64 %old, %val + store i64 %xor, i64 *%ptr2 + ret void +} + +; Leave other more complicated tests to and-08.ll. |