aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll40
-rw-r--r--test/CodeGen/ARM/ghc-tcreturn-lowered.ll21
-rw-r--r--test/CodeGen/Mips/abicalls.ll21
-rw-r--r--test/CodeGen/Mips/atomic.ll14
-rw-r--r--test/CodeGen/Mips/bswap.ll36
-rw-r--r--test/CodeGen/Mips/cconv/arguments-float.ll22
-rw-r--r--test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll22
-rw-r--r--test/CodeGen/Mips/cconv/arguments-struct.ll41
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs.ll1104
-rw-r--r--test/CodeGen/Mips/cconv/arguments.ll30
-rw-r--r--test/CodeGen/Mips/cconv/return-float.ll4
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-float.ll4
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-struct-f128.ll36
-rw-r--r--test/CodeGen/Mips/cconv/return-struct.ll232
-rw-r--r--test/CodeGen/Mips/cconv/return.ll4
-rwxr-xr-xtest/CodeGen/Mips/cmov.ll34
-rw-r--r--test/CodeGen/Mips/const-mult.ll8
-rw-r--r--test/CodeGen/Mips/countleading.ll4
-rw-r--r--test/CodeGen/Mips/divrem.ll12
-rw-r--r--test/CodeGen/Mips/ehframe-indirect.ll15
-rw-r--r--test/CodeGen/Mips/fastcc.ll86
-rw-r--r--test/CodeGen/Mips/fp64a.ll98
-rw-r--r--test/CodeGen/Mips/inlineasm-operand-code.ll27
-rw-r--r--test/CodeGen/Mips/load-store-left-right.ll4
-rw-r--r--test/CodeGen/Mips/longbranch.ll2
-rw-r--r--test/CodeGen/Mips/madd-msub.ll48
-rw-r--r--test/CodeGen/Mips/mips64-f128.ll4
-rw-r--r--test/CodeGen/Mips/mips64-sret.ll2
-rw-r--r--test/CodeGen/Mips/msa/frameindex.ll92
-rw-r--r--test/CodeGen/Mips/octeon_popcnt.ll6
-rw-r--r--test/CodeGen/Mips/select.ll20
-rw-r--r--test/CodeGen/Mips/zeroreg.ll4
-rw-r--r--test/CodeGen/PowerPC/blockaddress.ll26
-rw-r--r--test/CodeGen/PowerPC/cc.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion.ll121
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ret.ll66
-rw-r--r--test/CodeGen/PowerPC/ia-mem-r0.ll94
-rw-r--r--test/CodeGen/PowerPC/ia-neg-const.ll25
-rw-r--r--test/CodeGen/PowerPC/stack-realign.ll8
-rw-r--r--test/CodeGen/PowerPC/subreg-postra-2.ll175
-rw-r--r--test/CodeGen/PowerPC/subreg-postra.ll168
-rw-r--r--test/CodeGen/PowerPC/tls-pic.ll16
-rw-r--r--test/CodeGen/PowerPC/tls-store2.ll33
-rw-r--r--test/MC/Disassembler/Mips/mips2.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64.txt6
-rw-r--r--test/MC/PowerPC/ppc64-localentry.s19
-rw-r--r--test/Transforms/GCOVProfiling/linezero.ll2
-rw-r--r--test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll2
-rw-r--r--test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll4
-rw-r--r--test/Transforms/IndVarSimplify/lftr-extend-const.ll4
-rw-r--r--test/Transforms/IndVarSimplify/lftr-reuse.ll24
-rw-r--r--test/Transforms/IndVarSimplify/pr20680.ll219
-rw-r--r--test/Transforms/LICM/PR21582.ll40
-rw-r--r--test/Transforms/LICM/speculate.ll10
-rw-r--r--test/Transforms/LoopVectorize/incorrect-dom-info.ll142
-rw-r--r--test/Transforms/LoopVectorize/loop-form.ll31
-rw-r--r--test/Transforms/LoopVectorize/runtime-check-address-space.ll154
-rw-r--r--test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll120
-rw-r--r--test/Transforms/LoopVectorize/unsized-pointee-crash.ll24
-rw-r--r--test/Transforms/LoopVectorize/vect.stats.ll54
-rw-r--r--test/Transforms/SROA/phi-and-select.ll34
62 files changed, 3244 insertions, 502 deletions
diff --git a/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
new file mode 100644
index 000000000000..f1ddd13ef0d6
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; PR21622: Check for a crasher when the sum of exits to the same successor of a
+; loop overflows.
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'extremely_likely_loop_successor':
+; CHECK-NEXT: block-frequency-info: extremely_likely_loop_successor
+define void @extremely_likely_loop_successor() {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+ br label %loop
+
+; CHECK-NEXT: loop: float = 1.0,
+loop:
+ %exit.1.cond = call i1 @foo()
+ br i1 %exit.1.cond, label %exit, label %loop.2, !prof !0
+
+; CHECK-NEXT: loop.2: float = 0.0000000
+loop.2:
+ %exit.2.cond = call i1 @foo()
+ br i1 %exit.2.cond, label %exit, label %loop.3, !prof !0
+
+; CHECK-NEXT: loop.3: float = 0.0000000
+loop.3:
+ %exit.3.cond = call i1 @foo()
+ br i1 %exit.3.cond, label %exit, label %loop.4, !prof !0
+
+; CHECK-NEXT: loop.4: float = 0.0,
+loop.4:
+ %exit.4.cond = call i1 @foo()
+ br i1 %exit.4.cond, label %exit, label %loop, !prof !0
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+ ret void
+}
+
+declare i1 @foo()
+
+!0 = metadata !{metadata !"branch_weights", i32 4294967295, i32 1}
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
new file mode 100644
index 000000000000..6d2564ba1ab6
--- /dev/null
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s
+
+declare cc 10 void @g()
+
+define cc 10 void @test_direct_tail() {
+; CHECK-LABEL: test_direct_tail:
+; CHECK: b g
+
+ tail call cc10 void @g()
+ ret void
+}
+
+@ind_func = global void()* zeroinitializer
+
+define cc 10 void @test_indirect_tail() {
+; CHECK-LABEL: test_indirect_tail:
+; CHECK: bx {{r[0-9]+}}
+ %func = load void()** @ind_func
+ tail call cc10 void()* %func()
+ ret void
+}
diff --git a/test/CodeGen/Mips/abicalls.ll b/test/CodeGen/Mips/abicalls.ll
index 6fa33aa158ad..7edc3e25c352 100644
--- a/test/CodeGen/Mips/abicalls.ll
+++ b/test/CodeGen/Mips/abicalls.ll
@@ -1,16 +1,11 @@
-;
-; When the assembler is ready a .s file for it will
-; be created.
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=STATIC %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
-; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
-; TODO need to support -mno-abicalls
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr noabicalls -relocation-model=static %s -o - | FileCheck -implicit-check-not='.abicalls' -implicit-check-not='pic0' %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; ABICALLS: .abicalls
-; CHECK-STATIC: .abicalls
-; CHECK-STATIC-NEXT: pic0
-; CHECK-PIC: .abicalls
-; CHECK-PIC-NOT: pic0
+; STATIC: pic0
+; PIC-NOT: pic0
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index f4118ecec79d..78fd8296178e 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -12,7 +12,7 @@
@x = common global i32 0, align 4
-define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw add i32* @x, i32 %incr monotonic
ret i32 %0
@@ -29,7 +29,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw nand i32* @x, i32 %incr monotonic
ret i32 %0
@@ -47,7 +47,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicSwap32(i32 %newval) nounwind {
+define i32 @AtomicSwap32(i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
@@ -66,7 +66,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
@@ -293,7 +293,7 @@ entry:
; HAS-SEB-SEH: seb $2, $[[R17]]
}
-define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind {
+define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind {
entry:
%0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
%1 = extractvalue { i8, i1 } %0, 1
@@ -381,7 +381,7 @@ entry:
@countsint = common global i32 0, align 4
-define i32 @CheckSync(i32 %v) nounwind noinline {
+define i32 @CheckSync(i32 signext %v) nounwind noinline {
entry:
%0 = atomicrmw add i32* @countsint, i32 %v seq_cst
ret i32 %0
@@ -415,7 +415,7 @@ entry:
; Check that MIPS32R6 has the correct offset range.
; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
-define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
ret i32 %0
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
index 812eef137773..f182e65b0266 100644
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16
-define i32 @bswap32(i32 %x) nounwind readnone {
+define i32 @bswap32(i32 signext %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswap32:
; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -29,7 +29,7 @@ entry:
ret i32 %or.3
}
-define i64 @bswap64(i64 %x) nounwind readnone {
+define i64 @bswap64(i64 signext %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswap64:
; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -72,24 +72,24 @@ entry:
define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswapv4i32:
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
; MIPS64-LABEL: bswapv4i32:
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and
; would be very long
diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll
index e2119ec08028..14a3baa7f539 100644
--- a/test/CodeGen/Mips/cconv/arguments-float.ll
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -69,26 +69,26 @@ entry:
; O32-DAG: sw [[R4]], 28([[R2]])
; NEW-DAG: sd $6, 24([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 36($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
; O32-DAG: sw [[R3]], 32([[R2]])
; O32-DAG: sw [[R4]], 36([[R2]])
; NEW-DAG: sd $7, 32([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 40($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 44($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
; O32-DAG: sw [[R3]], 40([[R2]])
; O32-DAG: sw [[R4]], 44([[R2]])
; NEW-DAG: sd $8, 40([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 52($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
; O32-DAG: sw [[R3]], 48([[R2]])
; O32-DAG: sw [[R4]], 52([[R2]])
; NEW-DAG: sd $9, 48([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp)
; O32-DAG: sw [[R3]], 56([[R2]])
; O32-DAG: sw [[R4]], 60([[R2]])
; NEW-DAG: sd $10, 56([[R2]])
@@ -135,8 +135,8 @@ entry:
; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(floats)(
; The first four arguments are the same in O32/N32/N64.
-; The first argument isn't floating point so floating point registers are not
-; used.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
; aligned and occupying one slot. We'll use GCC's definition.
@@ -195,7 +195,7 @@ entry:
; O32-DAG: sw $7, 12([[R2]])
; NEW-DAG: sd $5, 8([[R2]])
-define void @float_arg2(i8 %a, float %b) nounwind {
+define void @float_arg2(i8 signext %a, float %b) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
index aadf7d18c17d..70ccf14c5450 100644
--- a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@@ -4,11 +4,11 @@
; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s
; Test the effect of varargs on floating point types in the non-variable part
; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
@@ -34,6 +34,7 @@ entry:
%b = va_arg i8** %ap, double
%1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
store volatile double %b, double* %1
+ call void @llvm.va_end(i8* %ap2)
ret void
}
@@ -98,6 +99,7 @@ entry:
%b = va_arg i8** %ap, float
%1 = getelementptr [11 x float]* @floats, i32 0, i32 2
store volatile float %b, float* %1
+ call void @llvm.va_end(i8* %ap2)
ret void
}
@@ -140,16 +142,18 @@ entry:
; Increment the pointer then get the varargs arg
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
; during lowering. This is fine and doesn't change the behaviour.
-; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine
-; since the stack is always aligned.
+; Also, in big-endian mode the offset must be increased by 4 to retrieve the
+; correct half of the argument slot.
+;
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4
; O32-DAG: sw [[VAPTR]], 4($sp)
-; N32-DAG: ori [[VAPTR]], [[VAPTR]], 4
+; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
; N32-DAG: sw [[VAPTR]], 4($sp)
-; N64-DAG: ori [[VAPTR]], [[VAPTR]], 4
+; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
; N64-DAG: sd [[VAPTR]], 0($sp)
; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
-; NEW-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
; ALL-DAG: swc1 [[FTMP1]], 8([[R2]])
declare void @llvm.va_start(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments-struct.ll b/test/CodeGen/Mips/cconv/arguments-struct.ll
new file mode 100644
index 000000000000..c1bc84ee7a04
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-struct.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s
+
+; Test small structures for all ABI's and byte orders.
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [2 x i8] zeroinitializer
+
+define void @s_i8(i8 inreg %a) nounwind {
+entry:
+ store i8 %a, i8* getelementptr inbounds ([2 x i8]* @bytes, i32 0, i32 1)
+ ret void
+}
+
+; ALL-LABEL: s_i8:
+
+; SYM32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(bytes)
+; SYM32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(bytes)
+
+; SYM64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(bytes)(
+
+; O32-BE-DAG: srl [[ARG:\$[0-9]+]], $4, 24
+; O32-BE-DAG: sb [[ARG]], 1([[PTR]])
+
+; O32-LE-DAG: sb $4, 1([[PTR]])
+
+; NEW-BE-DAG: dsrl [[ARG:\$[0-9]+]], $4, 56
+; NEW-BE-DAG: sb [[ARG]], 1([[PTR]])
+
+; NEW-LE-DAG: sb $4, 1([[PTR]])
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll
new file mode 100644
index 000000000000..adacda5bc420
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -0,0 +1,1104 @@
+; RUN: llc -mtriple=mips-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s
+
+@hwords = global [3 x i16] zeroinitializer, align 1
+@words = global [3 x i32] zeroinitializer, align 1
+@dwords = global [3 x i64] zeroinitializer, align 1
+
+define void @fn_i16_dotdotdot_i16(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i16_dotdotdot_i32(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i16_dotdotdot_i64(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i16(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i32(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i64(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i16(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i32(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i64(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll
index 8fe29f3c8ce7..43da6044408b 100644
--- a/test/CodeGen/Mips/cconv/arguments.ll
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
@@ -23,8 +23,10 @@
@floats = global [11 x float] zeroinitializer
@doubles = global [11 x double] zeroinitializer
-define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
- i8 %h, i8 %i, i8 %j) nounwind {
+define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c,
+ i8 signext %d, i8 signext %e, i8 signext %f,
+ i8 signext %g, i8 signext %h, i8 signext %i,
+ i8 signext %j) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
@@ -53,7 +55,7 @@ entry:
; We won't test the way the global address is calculated in this test. This is
; just to get the register number for the other checks.
; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)(
; The first four arguments are the same in O32/N32/N64
; ALL-DAG: sb $4, 1([[R1]])
@@ -82,15 +84,16 @@ entry:
; increase by 4 for O32 and 8 for N32/N64.
; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp)
; O32-DAG: sb [[R3]], 9([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)
; NEW-DAG: sb [[R3]], 9([[R1]])
; O32-DAG: lw [[R3:\$[0-9]+]], 36($sp)
; O32-DAG: sb [[R3]], 10([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 8($sp)
; NEW-DAG: sb [[R3]], 10([[R1]])
-define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
- i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c,
+ i8 signext %d, i8 signext %e, i8 signext %f,
+ i8 signext %g, i64 signext %i, i8 signext %j) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
@@ -117,9 +120,9 @@ entry:
; We won't test the way the global address is calculated in this test. This is
; just to get the register number for the other checks.
; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)(
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
-; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(dwords)(
+; SYM64-DAG: ld [[R2:\$[0-9]+]], %got_disp(dwords)(
; The first argument is the same in O32/N32/N64.
; ALL-DAG: sb $4, 1([[R1]])
@@ -137,8 +140,7 @@ entry:
; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
; accept it for now. The only IR difference is that this argument has
; anyext from i8 and align 8 on it.
-; O32LE-DAG: lbu [[R3:\$[0-9]+]], 16($sp)
-; O32BE-DAG: lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG: lw [[R3:\$[0-9]+]], 16($sp)
; O32-DAG: sb [[R3]], 2([[R1]])
; NEW-DAG: sb $6, 2([[R1]])
; O32-DAG: lw [[R3:\$[0-9]+]], 20($sp)
@@ -166,5 +168,5 @@ entry:
; increase by 4 for O32 and 8 for N32/N64.
; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp)
; O32-DAG: sb [[R3]], 7([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)
; NEW-DAG: sb [[R3]], 7([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll
index 28cf83d3efcf..d1a5e4f2fa9d 100644
--- a/test/CodeGen/Mips/cconv/return-float.ll
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@@ -30,7 +30,7 @@ entry:
; O32-DAG: lw $2, %lo(float)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float)
; N32-DAG: lw $2, %lo(float)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)(
; N64-DAG: lw $2, 0([[R1]])
define double @retdouble() nounwind {
@@ -44,5 +44,5 @@ entry:
; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
; O32-DAG: lw $3, 4([[R2]])
; N32-DAG: ld $2, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)(
; N64-DAG: ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll
index 3eb26fa9d24f..123b499185a9 100644
--- a/test/CodeGen/Mips/cconv/return-hard-float.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -33,7 +33,7 @@ entry:
; O32-DAG: lwc1 $f0, %lo(float)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float)
; N32-DAG: lwc1 $f0, %lo(float)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)(
; N64-DAG: lwc1 $f0, 0([[R1]])
define double @retdouble() nounwind {
@@ -45,7 +45,7 @@ entry:
; ALL-LABEL: retdouble:
; O32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
; N32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)(
; N64-DAG: ldc1 $f0, 0([[R1]])
define { double, double } @retComplexDouble() #0 {
diff --git a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
new file mode 100644
index 000000000000..2e8447710281
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test return of {fp128} agrees with de-facto N32/N64 ABI.
+
+@struct_fp128 = global {fp128} zeroinitializer
+
+define inreg {fp128} @ret_struct_fp128() nounwind {
+entry:
+ %0 = load volatile {fp128}* @struct_fp128
+ ret {fp128} %0
+}
+
+; ALL-LABEL: ret_struct_fp128:
+
+; O32 generates different IR so we don't test it here. It returns the struct
+; indirectly.
+
+; Contrary to the N32/N64 ABI documentation, a struct containing a long double
+; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+; match the de facto ABI as implemented by GCC.
+; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128)
+; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
+; N32-DAG: dmtc1 [[R2]], $f0
+; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
+; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
+; N32-DAG: dmtc1 [[R4]], $f1
+
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_fp128)($1)
+; N64-DAG: ld [[R2:\$[0-9]+]], 0([[R1]])
+; N64-DAG: dmtc1 [[R2]], $f0
+; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]])
+; N64-DAG: dmtc1 [[R4]], $f1
diff --git a/test/CodeGen/Mips/cconv/return-struct.ll b/test/CodeGen/Mips/cconv/return-struct.ll
new file mode 100644
index 000000000000..11a8cf032148
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-struct.ll
@@ -0,0 +1,232 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
+
+; Test struct returns for all ABI's and byte orders.
+
+@struct_byte = global {i8} zeroinitializer
+@struct_2byte = global {i8,i8} zeroinitializer
+@struct_3xi16 = global {[3 x i16]} zeroinitializer
+@struct_6xi32 = global {[6 x i32]} zeroinitializer
+@struct_128xi16 = global {[128 x i16]} zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define inreg {i8} @ret_struct_i8() nounwind {
+entry:
+ %0 = load volatile {i8}* @struct_byte
+ ret {i8} %0
+}
+
+; ALL-LABEL: ret_struct_i8:
+; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; O32-DAG: lbu $2, %lo(struct_byte)([[R1]])
+
+; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-LE-DAG: lb $2, %lo(struct_byte)([[R1]])
+
+; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-BE-DAG: lb [[R2:\$[0-9]+]], %lo(struct_byte)([[R1]])
+; N32-BE-DAG: dsll $2, [[R2]], 56
+
+; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-LE-DAG: lb $2, 0([[R1]])
+
+; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-BE-DAG: lb [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG: dsll $2, [[R2]], 56
+
+; This test is based on the way clang currently lowers {i8,i8} to {i16}.
+; FIXME: It should probably work for without any lowering too but this doesn't
+; work as expected. Each member gets mapped to a register rather than
+; packed into a single register.
+define inreg {i16} @ret_struct_i16() nounwind {
+entry:
+ %retval = alloca {i8,i8}, align 1
+ %0 = bitcast {i8,i8}* %retval to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+ %1 = bitcast {i8,i8}* %retval to {i16}*
+ %2 = load volatile {i16}* %1
+ ret {i16} %2
+}
+
+; ALL-LABEL: ret_struct_i16:
+; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; O32-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; O32-DAG: sh [[R2]], 0([[SP:\$sp]])
+; O32-DAG: lhu $2, 0([[SP:\$sp]])
+
+; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-LE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-LE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N32-LE-DAG: lh $2, 8([[SP:\$sp]])
+
+; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-BE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-BE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N32-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N32-BE-DAG: dsll $2, [[R3]], 48
+
+; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-LE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-LE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N64-LE-DAG: lh $2, 8([[SP:\$sp]])
+
+; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-BE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N64-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N64-BE-DAG: dsll $2, [[R3]], 48
+
+; Ensure that structures bigger than 32-bits but smaller than 64-bits are
+; also returned in the upper bits on big endian targets. Previously, these were
+; missed by the CCPromoteToType and the shift didn't happen.
+define inreg {i48} @ret_struct_3xi16() nounwind {
+entry:
+ %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
+ %1 = insertvalue {i48} undef, i48 %0, 0
+ ret {i48} %1
+}
+
+; ALL-LABEL: ret_struct_3xi16:
+
+; O32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-BE-DAG: lhu [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; O32-BE-DAG: lw [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; O32-BE-DAG: sll [[R3:\$[0-9]+]], [[R2]], 16
+; O32-BE-DAG: or $3, [[R1]], [[R3]]
+; O32-BE-DAG: srl $2, [[R2]], 16
+
+; O32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-LE-DAG: lhu $3, 4([[PTR_LO]])
+; O32-LE-DAG: lw $2, %lo(struct_3xi16)([[PTR_HI]])
+
+; N32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; N32-LE-DAG: lwu [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N32-LE-DAG: or $2, [[R2]], [[R3]]
+
+; N32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-BE-DAG: lw [[R1:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N32-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR_LO]])
+; N32-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG: dsll $2, [[R4]], 16
+
+; N64-LE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR]])
+; N64-LE-DAG: lwu [[R2:\$[0-9]+]], 0([[PTR]])
+; N64-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N64-LE-DAG: or $2, [[R2]], [[R3]]
+
+; N64-BE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-BE-DAG: lw [[R1:\$[0-9]+]], 0([[PTR]])
+; N64-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N64-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR]])
+; N64-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG: dsll $2, [[R4]], 16
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; We pick an extremely large structure so we don't have to match inlined memcpy's.
+define void @ret_struct_128xi16({[128 x i16]}* sret %returnval) {
+entry:
+ %0 = bitcast {[128 x i16]}* %returnval to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i32 2, i1 false)
+ ret void
+}
+
+; ALL-LABEL: ret_struct_128xi16:
+
+; sret pointer is already in $4
+; O32-DAG: lui [[PTR:\$[0-9]+]], %hi(struct_128xi16)
+; O32-DAG: addiu $5, [[PTR]], %lo(struct_128xi16)
+; O32: jal memcpy
+
+; sret pointer is already in $4
+; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_128xi16)
+; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_128xi16)
+; FIXME: This signext isn't necessary. Like integers, pointers are
+; but unlike integers, pointers cannot have the signext attribute.
+; N32-DAG: sll $5, [[PTR]], 0
+; N32: jal memcpy
+
+; sret pointer is already in $4
+; N64-DAG: ld $5, %got_disp(struct_128xi16)(
+; N64-DAG: ld $25, %call16(memcpy)(
+; N64: jalr $25
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; This will generate inlined memcpy's anyway so pick the smallest large
+; structure
+; This time we let the backend lower the sret argument.
+define {[6 x i32]} @ret_struct_6xi32() {
+entry:
+ %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2
+ ret {[6 x i32]} %0
+}
+
+; ALL-LABEL: ret_struct_6xi32:
+
+; sret pointer is already in $4
+; O32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; O32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; O32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; O32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; O32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; O32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; O32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; O32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; O32-DAG: sw [[T0]], 0($4)
+; O32-DAG: sw [[T1]], 4($4)
+; O32-DAG: sw [[T2]], 8($4)
+; O32-DAG: sw [[T3]], 12($4)
+; O32-DAG: sw [[T4]], 16($4)
+; O32-DAG: sw [[T5]], 20($4)
+
+; FIXME: This signext isn't necessary. Like integers, pointers are
+; but unlike integers, pointers cannot have the signext attribute.
+; In this case we don't have anywhere to put the signext either since
+; the sret argument is invented by the backend.
+; N32-DAG: sll [[RET_PTR:\$[0-9]+]], $4, 0
+; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; N32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; N32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N32-DAG: sw [[T0]], 0([[RET_PTR]])
+; N32-DAG: sw [[T1]], 4([[RET_PTR]])
+; N32-DAG: sw [[T2]], 8([[RET_PTR]])
+; N32-DAG: sw [[T3]], 12([[RET_PTR]])
+; N32-DAG: sw [[T4]], 16([[RET_PTR]])
+; N32-DAG: sw [[T5]], 20([[RET_PTR]])
+
+; sret pointer is already in $4
+; N64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_6xi32)(
+; N64-DAG: lw [[T0:\$[0-9]+]], 0([[PTR]])
+; N64-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N64-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N64-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N64-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N64-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N64-DAG: sw [[T0]], 0($4)
+; N64-DAG: sw [[T1]], 4($4)
+; N64-DAG: sw [[T2]], 8($4)
+; N64-DAG: sw [[T3]], 12($4)
+; N64-DAG: sw [[T4]], 16($4)
+; N64-DAG: sw [[T5]], 20($4)
diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll
index 76ce5e44c4ae..63f9b5f45a18 100644
--- a/test/CodeGen/Mips/cconv/return.ll
+++ b/test/CodeGen/Mips/cconv/return.ll
@@ -33,7 +33,7 @@ entry:
; O32-DAG: lbu $2, %lo(byte)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(byte)
; N32-DAG: lbu $2, %lo(byte)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)(
; N64-DAG: lbu $2, 0([[R1]])
define i32 @reti32() nounwind {
@@ -47,7 +47,7 @@ entry:
; O32-DAG: lw $2, %lo(word)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(word)
; N32-DAG: lw $2, %lo(word)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)(
; N64-DAG: lw $2, 0([[R1]])
define i64 @reti64() nounwind {
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 0c13fb1adfbe..e548049ab346 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -38,7 +38,7 @@
; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: ld $2, 0($[[T2]])
-define i32* @cmov1(i32 %s) nounwind readonly {
+define i32* @cmov1(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
%tmp1 = load i32** @i3, align 4
@@ -78,7 +78,7 @@ entry:
; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: lw $2, 0($[[T2]])
-define i32 @cmov2(i32 %s) nounwind readonly {
+define i32 @cmov2(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
%tmp1 = load i32* @c, align 4
@@ -109,7 +109,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
entry:
%cmp = icmp eq i32 %a, 234
%cond = select i1 %cmp, i32 %b, i32 %c
@@ -142,7 +142,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3_ne(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
entry:
%cmp = icmp ne i32 %a, 234
%cond = select i1 %cmp, i32 %b, i32 %c
@@ -179,7 +179,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
entry:
%cmp = icmp eq i32 %a, 234
%cond = select i1 %cmp, i64 %b, i64 %c
@@ -220,7 +220,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4_ne(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
entry:
%cmp = icmp ne i32 %a, 234
%cond = select i1 %cmp, i64 %b, i64 %c
@@ -263,7 +263,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti0(i32 %a) {
+define i32 @slti0(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, 32766
%cond = select i1 %cmp, i32 3, i32 5
@@ -302,7 +302,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti1(i32 %a) {
+define i32 @slti1(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, 32767
%cond = select i1 %cmp, i32 7, i32 5
@@ -337,7 +337,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti2(i32 %a) {
+define i32 @slti2(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, -32769
%cond = select i1 %cmp, i32 3, i32 5
@@ -380,7 +380,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti3(i32 %a) {
+define i32 @slti3(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, -32770
%cond = select i1 %cmp, i32 3, i32 5
@@ -567,7 +567,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu0(i32 %a) {
+define i32 @sltiu0(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, 32766
%cond = select i1 %cmp, i32 3, i32 5
@@ -606,7 +606,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu1(i32 %a) {
+define i32 @sltiu1(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, 32767
%cond = select i1 %cmp, i32 7, i32 5
@@ -641,7 +641,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu2(i32 %a) {
+define i32 @sltiu2(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, -32769
%cond = select i1 %cmp, i32 3, i32 5
@@ -684,7 +684,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu3(i32 %a) {
+define i32 @sltiu3(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, -32770
%cond = select i1 %cmp, i32 3, i32 5
@@ -697,7 +697,7 @@ entry:
; doesn't generate conditional moves
; for constant operands whose difference is |1|
-define i32 @slti4(i32 %a) nounwind readnone {
+define i32 @slti4(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 4, i32 3
ret i32 %2
@@ -723,7 +723,7 @@ define i32 @slti4(i32 %a) nounwind readnone {
; 64-CMP-NOT: seleqz
; 64-CMP-NOT: selnez
-define i32 @slti5(i32 %a) nounwind readnone {
+define i32 @slti5(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 -3, i32 -4
ret i32 %2
@@ -749,7 +749,7 @@ define i32 @slti5(i32 %a) nounwind readnone {
; 64-CMP-NOT: seleqz
; 64-CMP-NOT: selnez
-define i32 @slti6(i32 %a) nounwind readnone {
+define i32 @slti6(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 3, i32 4
ret i32 %2
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
index 186202141dcb..60b2a88196bd 100644
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -5,7 +5,7 @@
; CHECK: sll $[[R0:[0-9]+]], $4, 2
; CHECK: addu ${{[0-9]+}}, $[[R0]], $4
-define i32 @mul5_32(i32 %a) {
+define i32 @mul5_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, 5
ret i32 %mul
@@ -17,7 +17,7 @@ entry:
; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
; CHECK: subu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i32 @mul27_32(i32 %a) {
+define i32 @mul27_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, 27
ret i32 %mul
@@ -29,7 +29,7 @@ entry:
; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
; CHECK: addu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i32 @muln2147483643_32(i32 %a) {
+define i32 @muln2147483643_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, -2147483643
ret i32 %mul
@@ -41,7 +41,7 @@ entry:
; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
; CHECK64: daddu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i64 @muln9223372036854775805_64(i64 %a) {
+define i64 @muln9223372036854775805_64(i64 signext %a) {
entry:
%mul = mul nsw i64 %a, -9223372036854775805
ret i64 %mul
diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll
index 6e63cff123cf..b7aad049e8ab 100644
--- a/test/CodeGen/Mips/countleading.ll
+++ b/test/CodeGen/Mips/countleading.ll
@@ -11,7 +11,7 @@
; MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
; MIPS64-GT-R1 - MIPS64r1 and above
-define i32 @ctlz_i32(i32 %X) nounwind readnone {
+define i32 @ctlz_i32(i32 signext %X) nounwind readnone {
entry:
; ALL-LABEL: ctlz_i32:
@@ -27,7 +27,7 @@ entry:
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-define i32 @ctlo_i32(i32 %X) nounwind readnone {
+define i32 @ctlo_i32(i32 signext %X) nounwind readnone {
entry:
; ALL-LABEL: ctlo_i32:
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index 97f836044406..a9cfe0fa1523 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -27,7 +27,7 @@
@g0 = common global i32 0, align 4
@g1 = common global i32 0, align 4
-define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone {
entry:
; ALL-LABEL: sdiv1:
@@ -54,7 +54,7 @@ entry:
ret i32 %div
}
-define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone {
entry:
; ALL-LABEL: srem1:
@@ -81,7 +81,7 @@ entry:
ret i32 %rem
}
-define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
entry:
; ALL-LABEL: udiv1:
@@ -107,7 +107,7 @@ entry:
ret i32 %div
}
-define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
entry:
; ALL-LABEL: urem1:
@@ -134,7 +134,7 @@ entry:
ret i32 %rem
}
-define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @sdivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind {
entry:
; ALL-LABEL: sdivrem1:
@@ -175,7 +175,7 @@ entry:
ret i32 %div
}
-define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind {
entry:
; ALL-LABEL: udivrem1:
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
index e78497a9521e..b4efb40b6422 100644
--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=CHECK64 %s
+; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=CHECK64 %s
define i32 @main() {
; CHECK: .cfi_startproc
@@ -27,8 +29,11 @@ declare void @foo()
; CHECK: .hidden DW.ref.__gxx_personality_v0
; CHECK: .weak DW.ref.__gxx_personality_v0
; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
-; CHECK: .align 2
+; CHECK32: .align 2
+; CHECK64: .align 3
; CHECK: .type DW.ref.__gxx_personality_v0,@object
-; CHECK: .size DW.ref.__gxx_personality_v0, 4
+; CHECK32: .size DW.ref.__gxx_personality_v0, 4
+; CHECK64: .size DW.ref.__gxx_personality_v0, 8
; CHECK: DW.ref.__gxx_personality_v0:
-; CHECK: .4byte __gxx_personality_v0
+; CHECK32: .4byte __gxx_personality_v0
+; CHECK64: .8byte __gxx_personality_v0
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
index 822902c27d2f..6b022c5e36d9 100644
--- a/test/CodeGen/Mips/fastcc.ll
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=mipsel-none-nacl-gnu \
; RUN: | FileCheck %s -check-prefix=CHECK-NACL
; RUN: llc < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+fp64,+nooddspreg | FileCheck %s -check-prefix=FP64-NOODDSPREG
@gi0 = external global i32
@@ -82,6 +83,7 @@
@g16 = external global i32
@fa = common global [11 x float] zeroinitializer, align 4
+@da = common global [11 x double] zeroinitializer, align 8
define void @caller0() nounwind {
entry:
@@ -270,7 +272,7 @@ entry:
define void @caller2() {
entry:
-; NOODDSPREG-LABEL: caller2
+; NOODDSPREG-LABEL: caller2:
; Check that first 10 arguments are passed in even float registers
; f0, f2, ... , f18. Check that 11th argument is passed on stack.
@@ -312,7 +314,7 @@ define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3,
float %a8, float %a9, float %a10) {
entry:
-; NOODDSPREG-LABEL: callee2
+; NOODDSPREG-LABEL: callee2:
; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
@@ -348,3 +350,83 @@ entry:
ret void
}
+define void @caller3() {
+entry:
+
+; FP64-NOODDSPREG-LABEL: caller3:
+
+; Check that first 10 arguments are passed in even float registers
+; f0, f2, ... , f18. Check that 11th argument is passed on stack.
+
+; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG: ldc1 $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 80($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 0($sp)
+
+ %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+ %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+ %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+ %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+ %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+ %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+ %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+ %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+ %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+ %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+ %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ tail call fastcc void @callee3(double %0, double %1, double %2, double %3,
+ double %4, double %5, double %6, double %7,
+ double %8, double %9, double %10)
+ ret void
+}
+
+define fastcc void @callee3(double %a0, double %a1, double %a2, double %a3,
+ double %a4, double %a5, double %a6, double %a7,
+ double %a8, double %a9, double %a10) {
+entry:
+
+; FP64-NOODDSPREG-LABEL: callee3:
+
+; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
+
+; Check that first 10 arguments are received in even float registers
+; f0, f2, ... , f18. Check that 11th argument is received on stack.
+
+; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG: sdc1 $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
+; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]])
+
+ store double %a0, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+ store double %a1, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+ store double %a2, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+ store double %a3, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+ store double %a4, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+ store double %a5, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+ store double %a6, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+ store double %a7, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+ store double %a8, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+ store double %a9, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+ store double %a10, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ ret void
+}
diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll
index 5c2c87373a32..fadce5cb748b 100644
--- a/test/CodeGen/Mips/fp64a.ll
+++ b/test/CodeGen/Mips/fp64a.ll
@@ -12,9 +12,9 @@
; this check here.
; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A
; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE
-; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A
; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
@@ -38,15 +38,10 @@ define double @call1(double %d, ...) {
; 32R2-NO-FP64A-BE: mtc1 $5, $f0
; 32R2-NO-FP64A-BE: mthc1 $4, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $4, 0($sp)
-; 32R2-FP64A-LE: sw $5, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $5, 0($sp)
-; 32R2-FP64A-BE: sw $4, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $4, 0($sp)
+; 32R2-FP64A: sw $5, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A: daddiu $sp, $sp, -64
; 64-NO-FP64A: mov.d $f0, $f12
@@ -63,15 +58,10 @@ define double @call2(i32 %i, double %d) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A-NOT: daddiu $sp, $sp
; 64-NO-FP64A: mov.d $f0, $f13
@@ -88,15 +78,10 @@ define double @call3(float %f1, float %f2, double %d) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A-NOT: daddiu $sp, $sp
; 64-NO-FP64A: mov.d $f0, $f14
@@ -113,15 +98,10 @@ define double @call4(float %f, double %d, ...) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A: daddiu $sp, $sp, -48
; 64-NO-FP64A: mov.d $f0, $f13
@@ -145,23 +125,14 @@ define double @call5(double %a, double %b, ...) {
; 32R2-NO-FP64A-BE-DAG: mthc1 $6, $[[T1:f[0-9]+]]
; 32R2-NO-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]]
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE: sw $4, 0($sp)
-; 32R2-FP64A-LE: sw $5, 4($sp)
-; 32R2-FP64A-LE: ldc1 $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE: sub.d $f0, $[[T0]], $[[T1]]
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE: sw $5, 0($sp)
-; 32R2-FP64A-BE: sw $4, 4($sp)
-; 32R2-FP64A-BE: ldc1 $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]]
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A: sw $4, 0($sp)
+; 32R2-FP64A: sw $5, 4($sp)
+; 32R2-FP64A: ldc1 $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A: sub.d $f0, $[[T0]], $[[T1]]
; 64-NO-FP64A: sub.d $f0, $f12, $f13
}
@@ -179,19 +150,12 @@ define double @move_from(double %d) {
; 32R2-NO-FP64A-BE-DAG: mfc1 $7, $f0
; 32R2-NO-FP64A-BE-DAG: mfhc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -32
-; 32R2-FP64A-LE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-LE: lw $6, 16($sp)
-; FIXME: This store is redundant
-; 32R2-FP64A-LE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-LE: lw $7, 20($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -32
-; 32R2-FP64A-BE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-BE: lw $6, 20($sp)
+; 32R2-FP64A: addiu $sp, $sp, -32
+; 32R2-FP64A: sdc1 $f0, 16($sp)
+; 32R2-FP64A: lw $6, 16($sp)
; FIXME: This store is redundant
-; 32R2-FP64A-BE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-BE: lw $7, 16($sp)
+; 32R2-FP64A: sdc1 $f0, 16($sp)
+; 32R2-FP64A: lw $7, 20($sp)
; 64-NO-FP64A: mov.d $f13, $f0
}
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 6512851a11be..3d9dec76fb37 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -65,6 +65,33 @@ entry:
;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},$0
;CHECK_LITTLE_32: #NO_APP
tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+
+; z with non-zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind
+
+; z with zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 $0, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind
+
+; z with non-zero and just the "r"(register) constraint
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind
+
+; z with zero and just the "r"(register) constraint
+; FIXME: Check for $0, instead of other registers.
+; We should be using $0 directly in this case, not real registers.
+; When the materialization of 0 gets fixed, this test will fail.
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
index a3f5ebfb5460..f6d0e8debb36 100644
--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -47,7 +47,7 @@ entry:
ret i32 %0
}
-define void @store_SI(i32 %a) nounwind {
+define void @store_SI(i32 signext %a) nounwind {
entry:
; ALL-LABEL: store_SI:
@@ -201,7 +201,7 @@ entry:
ret void
}
-define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+define void @store_SI_trunc_from_i64(i32 signext %a) nounwind {
entry:
; ALL-LABEL: store_SI_trunc_from_i64:
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index a403744c8fd5..b9b52be01dad 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -13,7 +13,7 @@
@x = external global i32
-define void @test1(i32 %s) {
+define void @test1(i32 signext %s) {
entry:
%cmp = icmp eq i32 %s, 0
br i1 %cmp, label %end, label %then
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 82229677ff11..b0c3ff6ff9b5 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -76,26 +76,14 @@ entry:
; 32R6-DAG: muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $2, $[[T3]], $[[T2]]
-; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG: [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG: daddu $2, $[[T4]], $[[T6]]
+; 64-DAG: d[[m:m]]ult $5, $4
+; 64-DAG: [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG: daddu $2, $[[T0]], $6
-; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG: daddu $2, $[[T4]], $[[T6]]
+; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG: daddu $2, $[[T0]], $6
-define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
entry:
%conv = zext i32 %a to i64
%conv2 = zext i32 %b to i64
@@ -214,26 +202,14 @@ entry:
; 32R6-DAG: negu $2, $[[T3]]
; 32R6-DAG: subu $3, $6, $[[T1]]
-; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG: [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG: dsubu $2, $[[T6]], $[[T4]]
+; 64-DAG: d[[m:m]]ult $5, $4
+; 64-DAG: [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG: dsubu $2, $6, $[[T0]]
-; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG: dsubu $2, $[[T6]], $[[T4]]
+; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG: dsubu $2, $6, $[[T0]]
-define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
entry:
%conv = zext i32 %c to i64
%conv2 = zext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index 7f7d515d690e..f0cbbd08d79a 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -114,7 +114,7 @@ entry:
; ALL-LABEL: conv_LD_UInt:
; ALL: ld $25, %call16(__floatunsitf)
-define fp128 @conv_LD_UInt(i32 %a) {
+define fp128 @conv_LD_UInt(i32 signext %a) {
entry:
%conv = uitofp i32 %a to fp128
ret fp128 %conv
@@ -635,7 +635,7 @@ entry:
; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]
-define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+define fp128 @select_LD(i32 signext %a, i64, fp128 %b, fp128 %c) {
entry:
%tobool = icmp ne i32 %a, 0
%cond = select i1 %tobool, fp128 %b, fp128 %c
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index 7a52c3d41d69..ed494e965b7d 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -11,7 +11,7 @@ entry:
ret void
}
-define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
+define void @bar(i32 signext %v, i32* noalias sret %agg.result) nounwind {
entry:
; CHECK-LABEL: bar:
; CHECK: sw $4, 0($5)
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
index 07e67bf04287..ebec465a3e33 100644
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -36,10 +36,10 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
%2 = alloca [497 x i8] ; Push the frame just over 512 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -53,12 +53,12 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -72,12 +72,12 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -107,10 +107,10 @@ define void @loadstore_v8i16_unaligned() nounwind {
%5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0
%6 = load volatile <8 x i16>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %6, <8 x i16>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -139,10 +139,10 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
%2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -156,12 +156,12 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -175,12 +175,12 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -210,10 +210,10 @@ define void @loadstore_v4i32_unaligned() nounwind {
%5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0
%6 = load volatile <4 x i32>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %6, <4 x i32>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -242,10 +242,10 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
%2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -259,12 +259,12 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -278,12 +278,12 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -313,10 +313,10 @@ define void @loadstore_v2i64_unaligned() nounwind {
%5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0
%6 = load volatile <2 x i64>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %6, <2 x i64>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -345,10 +345,10 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
%2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -362,12 +362,12 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -381,12 +381,12 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll
index 52c37f69d020..3432b3992984 100644
--- a/test/CodeGen/Mips/octeon_popcnt.ll
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@@ -6,7 +6,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
ret i8 %cnt
; OCTEON-LABEL: cnt8:
; OCTEON: jr $ra
-; OCTEON: pop $2, $1
+; OCTEON: pop $2, [[R1:\$[0-9]+]]
; MIPS64-LABEL: cnt8:
; MIPS64-NOT: pop
}
@@ -16,12 +16,12 @@ define i16 @cnt16(i16 %x) nounwind readnone {
ret i16 %cnt
; OCTEON-LABEL: cnt16:
; OCTEON: jr $ra
-; OCTEON: pop $2, $1
+; OCTEON: pop $2, [[R1:\$[0-9]+]]
; MIPS64-LABEL: cnt16:
; MIPS64-NOT: pop
}
-define i32 @cnt32(i32 %x) nounwind readnone {
+define i32 @cnt32(i32 zeroext %x) nounwind readnone {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
; OCTEON-LABEL: cnt32:
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index eb2198b36dff..d6e1826c30c8 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -8,7 +8,7 @@
@d2 = external global double
@d3 = external global double
-define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 signext %s, i32 signext %f0, i32 signext %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_i32_val:
@@ -37,7 +37,7 @@ entry:
ret i32 %cond
}
-define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 signext %s, i64 %f0, i64 %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_i64_val:
@@ -128,7 +128,7 @@ entry:
ret i64 %cond
}
-define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+define float @i32_icmp_ne_f32_val(i32 signext %s, float %f0, float %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_f32_val:
@@ -161,7 +161,7 @@ entry:
ret float %cond
}
-define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 signext %s, double %f0, double %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_f64_val:
@@ -496,7 +496,7 @@ entry:
ret float %cond
}
-define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_oeq_i32_val:
@@ -541,7 +541,7 @@ entry:
ret i32 %cond
}
-define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_olt_i32_val:
@@ -585,7 +585,7 @@ entry:
ret i32 %cond
}
-define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_ogt_i32_val:
@@ -630,7 +630,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_oeq_i32_val:
@@ -707,7 +707,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_olt_i32_val:
@@ -784,7 +784,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_ogt_i32_val:
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index a1b6cb0322b1..c766d3b3cc2a 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -8,7 +8,7 @@
@g1 = external global i32
-define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 signext %s) nounwind readonly {
entry:
; ALL-LABEL: sel_icmp_nez_i32_z0:
@@ -30,7 +30,7 @@ entry:
ret i32 %cond
}
-define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 signext %s) nounwind readonly {
entry:
; ALL-LABEL: sel_icmp_nez_i32_z1:
diff --git a/test/CodeGen/PowerPC/blockaddress.ll b/test/CodeGen/PowerPC/blockaddress.ll
new file mode 100644
index 000000000000..c1981e21fff4
--- /dev/null
+++ b/test/CodeGen/PowerPC/blockaddress.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+
+define i8* @test() {
+entry:
+ br label %here
+
+here: ; preds = %entry
+; MEDIUM: .Ltmp[[TMP0:[0-9]+]]:
+; MEDIUM: addis [[R0:[0-9]+]], 2, .LC[[LC0:[0-9]+]]@toc@ha
+; MEDIUM: ld 3, .LC[[LC0]]@toc@l([[R0]])
+; MEDIUM: blr
+; MEDIUM: .LC[[LC0]]:
+; MEDIUM: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+; SMALL: .Ltmp[[TMP0:[0-9]+]]:
+; SMALL: ld 3, .LC[[LC0:[0-9]+]]@toc(2)
+; SMALL: blr
+; SMALL: .LC[[LC0]]:
+; SMALL: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+ ret i8* blockaddress(@test, %here)
+}
+
diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll
index f92121bd7202..c23ee7c9f5c8 100644
--- a/test/CodeGen/PowerPC/cc.ll
+++ b/test/CodeGen/PowerPC/cc.ll
@@ -41,7 +41,7 @@ entry:
br label %foo
foo:
- call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+ call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
br i1 %c, label %bar, label %end
bar:
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index 5e00675c0398..71611060ed7a 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
@@ -9,12 +10,16 @@
define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i64
+; ELF64LE: sitofp_single_i64
; PPC970: sitofp_single_i64
%b.addr = alloca float, align 4
%conv = sitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -26,12 +31,20 @@ entry:
define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i32
+; ELF64LE: sitofp_single_i32
; PPC970: sitofp_single_i32
%b.addr = alloca float, align 4
%conv = sitofp i32 %a to float
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfids
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -43,6 +56,7 @@ entry:
define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i16
+; ELF64LE: sitofp_single_i16
; PPC970: sitofp_single_i16
%b.addr = alloca float, align 4
%conv = sitofp i16 %a to float
@@ -50,6 +64,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@@ -62,6 +80,7 @@ entry:
define void @sitofp_single_i8(i8 %a) nounwind ssp {
entry:
; ELF64: sitofp_single_i8
+; ELF64LE: sitofp_single_i8
; PPC970: sitofp_single_i8
%b.addr = alloca float, align 4
%conv = sitofp i8 %a to float
@@ -69,6 +88,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@@ -81,12 +104,20 @@ entry:
define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i32
+; ELF64LE: sitofp_double_i32
; PPC970: sitofp_double_i32
%b.addr = alloca double, align 8
%conv = sitofp i32 %a to double
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfid
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -97,12 +128,16 @@ entry:
define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i64
+; ELF64LE: sitofp_double_i64
; PPC970: sitofp_double_i64
%b.addr = alloca double, align 8
%conv = sitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -113,6 +148,7 @@ entry:
define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i16
+; ELF64LE: sitofp_double_i16
; PPC970: sitofp_double_i16
%b.addr = alloca double, align 8
%conv = sitofp i16 %a to double
@@ -120,6 +156,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@@ -131,6 +171,7 @@ entry:
define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i8
+; ELF64LE: sitofp_double_i8
; PPC970: sitofp_double_i8
%b.addr = alloca double, align 8
%conv = sitofp i8 %a to double
@@ -138,6 +179,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@@ -151,12 +196,16 @@ entry:
define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i64
+; ELF64LE: uitofp_single_i64
; PPC970: uitofp_single_i64
%b.addr = alloca float, align 4
%conv = uitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
ret void
@@ -165,12 +214,20 @@ entry:
define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i32
+; ELF64LE: uitofp_single_i32
; PPC970: uitofp_single_i32
%b.addr = alloca float, align 4
%conv = uitofp i32 %a to float
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidus
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidus
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
@@ -180,6 +237,7 @@ entry:
define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i16
+; ELF64LE: uitofp_single_i16
; PPC970: uitofp_single_i16
%b.addr = alloca float, align 4
%conv = uitofp i16 %a to float
@@ -187,6 +245,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
; PPC970: std
; PPC970: lfd
@@ -199,6 +261,7 @@ entry:
define void @uitofp_single_i8(i8 %a) nounwind ssp {
entry:
; ELF64: uitofp_single_i8
+; ELF64LE: uitofp_single_i8
; PPC970: uitofp_single_i8
%b.addr = alloca float, align 4
%conv = uitofp i8 %a to float
@@ -206,6 +269,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
; PPC970: std
; PPC970: lfd
@@ -218,12 +285,16 @@ entry:
define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i64
+; ELF64LE: uitofp_double_i64
; PPC970: uitofp_double_i64
%b.addr = alloca double, align 8
%conv = uitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
ret void
@@ -232,12 +303,20 @@ entry:
define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i32
+; ELF64LE: uitofp_double_i32
; PPC970: uitofp_double_i32
%b.addr = alloca double, align 8
%conv = uitofp i32 %a to double
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidu
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidu
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
@@ -247,6 +326,7 @@ entry:
define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i16
+; ELF64LE: uitofp_double_i16
; PPC970: uitofp_double_i16
%b.addr = alloca double, align 8
%conv = uitofp i16 %a to double
@@ -254,6 +334,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
; PPC970: std
; PPC970: lfd
@@ -265,6 +349,7 @@ entry:
define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i8
+; ELF64LE: uitofp_double_i8
; PPC970: uitofp_double_i8
%b.addr = alloca double, align 8
%conv = uitofp i8 %a to double
@@ -272,6 +357,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
; PPC970: std
; PPC970: lfd
@@ -285,12 +374,16 @@ entry:
define void @fptosi_float_i32(float %a) nounwind ssp {
entry:
; ELF64: fptosi_float_i32
+; ELF64LE: fptosi_float_i32
; PPC970: fptosi_float_i32
%b.addr = alloca i32, align 4
%conv = fptosi float %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@@ -301,12 +394,16 @@ entry:
define void @fptosi_float_i64(float %a) nounwind ssp {
entry:
; ELF64: fptosi_float_i64
+; ELF64LE: fptosi_float_i64
; PPC970: fptosi_float_i64
%b.addr = alloca i64, align 4
%conv = fptosi float %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@@ -317,12 +414,16 @@ entry:
define void @fptosi_double_i32(double %a) nounwind ssp {
entry:
; ELF64: fptosi_double_i32
+; ELF64LE: fptosi_double_i32
; PPC970: fptosi_double_i32
%b.addr = alloca i32, align 8
%conv = fptosi double %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@@ -333,12 +434,16 @@ entry:
define void @fptosi_double_i64(double %a) nounwind ssp {
entry:
; ELF64: fptosi_double_i64
+; ELF64LE: fptosi_double_i64
; PPC970: fptosi_double_i64
%b.addr = alloca i64, align 8
%conv = fptosi double %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@@ -351,12 +456,16 @@ entry:
define void @fptoui_float_i32(float %a) nounwind ssp {
entry:
; ELF64: fptoui_float_i32
+; ELF64LE: fptoui_float_i32
; PPC970: fptoui_float_i32
%b.addr = alloca i32, align 4
%conv = fptoui float %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@@ -367,12 +476,16 @@ entry:
define void @fptoui_float_i64(float %a) nounwind ssp {
entry:
; ELF64: fptoui_float_i64
+; ELF64LE: fptoui_float_i64
; PPC970: fptoui_float_i64
%b.addr = alloca i64, align 4
%conv = fptoui float %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 4
ret void
@@ -381,12 +494,16 @@ entry:
define void @fptoui_double_i32(double %a) nounwind ssp {
entry:
; ELF64: fptoui_double_i32
+; ELF64LE: fptoui_double_i32
; PPC970: fptoui_double_i32
%b.addr = alloca i32, align 8
%conv = fptoui double %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@@ -397,12 +514,16 @@ entry:
define void @fptoui_double_i64(double %a) nounwind ssp {
entry:
; ELF64: fptoui_double_i64
+; ELF64LE: fptoui_double_i64
; PPC970: fptoui_double_i64
%b.addr = alloca i64, align 8
%conv = fptoui double %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 8
ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index fa19f8b11fd6..f82de70c9286 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -1,8 +1,40 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+define zeroext i1 @rettrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: rettrue
+; ELF64: li 3, 1
+; ELF64: blr
+ ret i1 true
+}
+
+define zeroext i1 @retfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retfalse
+; ELF64: li 3, 0
+; ELF64: blr
+ ret i1 false
+}
+
+define signext i1 @retstrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retstrue
+; ELF64: li 3, -1
+; ELF64: blr
+ ret i1 true
+}
+
+define signext i1 @retsfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retsfalse
+; ELF64: li 3, 0
+; ELF64: blr
+ ret i1 false
+}
+
define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret2
+; ELF64-LABEL: ret2
; ELF64: extsb
; ELF64: blr
ret i8 %a
@@ -10,7 +42,7 @@ entry:
define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret3
+; ELF64-LABEL: ret3
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
; ELF64: blr
ret i8 %a
@@ -18,7 +50,7 @@ entry:
define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret4
+; ELF64-LABEL: ret4
; ELF64: extsh
; ELF64: blr
ret i16 %a
@@ -26,7 +58,7 @@ entry:
define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret5
+; ELF64-LABEL: ret5
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
@@ -34,7 +66,7 @@ entry:
define i16 @ret6(i16 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret6
+; ELF64-LABEL: ret6
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
@@ -42,7 +74,7 @@ entry:
define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret7
+; ELF64-LABEL: ret7
; ELF64: extsw
; ELF64: blr
ret i32 %a
@@ -50,7 +82,7 @@ entry:
define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret8
+; ELF64-LABEL: ret8
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
@@ -58,7 +90,7 @@ entry:
define i32 @ret9(i32 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret9
+; ELF64-LABEL: ret9
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
@@ -66,7 +98,7 @@ entry:
define i64 @ret10(i64 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret10
+; ELF64-LABEL: ret10
; ELF64-NOT: exts
; ELF64-NOT: rldicl
; ELF64: blr
@@ -75,21 +107,21 @@ entry:
define float @ret11(float %a) nounwind uwtable ssp {
entry:
-; ELF64: ret11
+; ELF64-LABEL: ret11
; ELF64: blr
ret float %a
}
define double @ret12(double %a) nounwind uwtable ssp {
entry:
-; ELF64: ret12
+; ELF64-LABEL: ret12
; ELF64: blr
ret double %a
}
define i8 @ret13() nounwind uwtable ssp {
entry:
-; ELF64: ret13
+; ELF64-LABEL: ret13
; ELF64: li
; ELF64: blr
ret i8 15;
@@ -97,7 +129,7 @@ entry:
define i16 @ret14() nounwind uwtable ssp {
entry:
-; ELF64: ret14
+; ELF64-LABEL: ret14
; ELF64: li
; ELF64: blr
ret i16 -225;
@@ -105,7 +137,7 @@ entry:
define i32 @ret15() nounwind uwtable ssp {
entry:
-; ELF64: ret15
+; ELF64-LABEL: ret15
; ELF64: lis
; ELF64: ori
; ELF64: blr
@@ -114,7 +146,7 @@ entry:
define i64 @ret16() nounwind uwtable ssp {
entry:
-; ELF64: ret16
+; ELF64-LABEL: ret16
; ELF64: li
; ELF64: sldi
; ELF64: oris
@@ -125,7 +157,7 @@ entry:
define float @ret17() nounwind uwtable ssp {
entry:
-; ELF64: ret17
+; ELF64-LABEL: ret17
; ELF64: addis
; ELF64: lfs
; ELF64: blr
@@ -134,7 +166,7 @@ entry:
define double @ret18() nounwind uwtable ssp {
entry:
-; ELF64: ret18
+; ELF64-LABEL: ret18
; ELF64: addis
; ELF64: lfd
; ELF64: blr
diff --git a/test/CodeGen/PowerPC/ia-mem-r0.ll b/test/CodeGen/PowerPC/ia-mem-r0.ll
new file mode 100644
index 000000000000..4ab17edc5b10
--- /dev/null
+++ b/test/CodeGen/PowerPC/ia-mem-r0.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Make sure that we don't generate a std r, 0(0) -- the memory address cannot
+; be stored in r0.
+; CHECK-LABEL: @test1
+; CHECK-NOT: std {{[0-9]+}}, 0(0)
+; CHECK: blr
+
+define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) {
+ %fn = alloca { i8*, void (i8*, i8*)* }
+ %sp = alloca i8*, align 8
+ %regs = alloca [18 x i64], align 8
+ store { i8*, void (i8*, i8*)* } %fn_arg, { i8*, void (i8*, i8*)* }* %fn
+ %1 = bitcast [18 x i64]* %regs to i64*
+ call void asm sideeffect "std 14, $0", "=*m"(i64* %1)
+ %2 = bitcast [18 x i64]* %regs to i8*
+ %3 = getelementptr i8* %2, i32 8
+ %4 = bitcast i8* %3 to i64*
+ call void asm sideeffect "std 15, $0", "=*m"(i64* %4)
+ %5 = bitcast [18 x i64]* %regs to i8*
+ %6 = getelementptr i8* %5, i32 16
+ %7 = bitcast i8* %6 to i64*
+ call void asm sideeffect "std 16, $0", "=*m"(i64* %7)
+ %8 = bitcast [18 x i64]* %regs to i8*
+ %9 = getelementptr i8* %8, i32 24
+ %10 = bitcast i8* %9 to i64*
+ call void asm sideeffect "std 17, $0", "=*m"(i64* %10)
+ %11 = bitcast [18 x i64]* %regs to i8*
+ %12 = getelementptr i8* %11, i32 32
+ %13 = bitcast i8* %12 to i64*
+ call void asm sideeffect "std 18, $0", "=*m"(i64* %13)
+ %14 = bitcast [18 x i64]* %regs to i8*
+ %15 = getelementptr i8* %14, i32 40
+ %16 = bitcast i8* %15 to i64*
+ call void asm sideeffect "std 19, $0", "=*m"(i64* %16)
+ %17 = bitcast [18 x i64]* %regs to i8*
+ %18 = getelementptr i8* %17, i32 48
+ %19 = bitcast i8* %18 to i64*
+ call void asm sideeffect "std 20, $0", "=*m"(i64* %19)
+ %20 = bitcast [18 x i64]* %regs to i8*
+ %21 = getelementptr i8* %20, i32 56
+ %22 = bitcast i8* %21 to i64*
+ call void asm sideeffect "std 21, $0", "=*m"(i64* %22)
+ %23 = bitcast [18 x i64]* %regs to i8*
+ %24 = getelementptr i8* %23, i32 64
+ %25 = bitcast i8* %24 to i64*
+ call void asm sideeffect "std 22, $0", "=*m"(i64* %25)
+ %26 = bitcast [18 x i64]* %regs to i8*
+ %27 = getelementptr i8* %26, i32 72
+ %28 = bitcast i8* %27 to i64*
+ call void asm sideeffect "std 23, $0", "=*m"(i64* %28)
+ %29 = bitcast [18 x i64]* %regs to i8*
+ %30 = getelementptr i8* %29, i32 80
+ %31 = bitcast i8* %30 to i64*
+ call void asm sideeffect "std 24, $0", "=*m"(i64* %31)
+ %32 = bitcast [18 x i64]* %regs to i8*
+ %33 = getelementptr i8* %32, i32 88
+ %34 = bitcast i8* %33 to i64*
+ call void asm sideeffect "std 25, $0", "=*m"(i64* %34)
+ %35 = bitcast [18 x i64]* %regs to i8*
+ %36 = getelementptr i8* %35, i32 96
+ %37 = bitcast i8* %36 to i64*
+ call void asm sideeffect "std 26, $0", "=*m"(i64* %37)
+ %38 = bitcast [18 x i64]* %regs to i8*
+ %39 = getelementptr i8* %38, i32 104
+ %40 = bitcast i8* %39 to i64*
+ call void asm sideeffect "std 27, $0", "=*m"(i64* %40)
+ %41 = bitcast [18 x i64]* %regs to i8*
+ %42 = getelementptr i8* %41, i32 112
+ %43 = bitcast i8* %42 to i64*
+ call void asm sideeffect "std 28, $0", "=*m"(i64* %43)
+ %44 = bitcast [18 x i64]* %regs to i8*
+ %45 = getelementptr i8* %44, i32 120
+ %46 = bitcast i8* %45 to i64*
+ call void asm sideeffect "std 29, $0", "=*m"(i64* %46)
+ %47 = bitcast [18 x i64]* %regs to i8*
+ %48 = getelementptr i8* %47, i32 128
+ %49 = bitcast i8* %48 to i64*
+ call void asm sideeffect "std 30, $0", "=*m"(i64* %49)
+ %50 = bitcast [18 x i64]* %regs to i8*
+ %51 = getelementptr i8* %50, i32 136
+ %52 = bitcast i8* %51 to i64*
+ call void asm sideeffect "std 31, $0", "=*m"(i64* %52)
+ %53 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
+ %.funcptr = load void (i8*, i8*)** %53
+ %54 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
+ %.ptr = load i8** %54
+ %55 = load i8** %sp
+ call void %.funcptr(i8* %.ptr, i8* %55)
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/ia-neg-const.ll b/test/CodeGen/PowerPC/ia-neg-const.ll
new file mode 100644
index 000000000000..165fc1339d0b
--- /dev/null
+++ b/test/CodeGen/PowerPC/ia-neg-const.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+
+; Function Attrs: nounwind
+define i64 @main() #0 {
+entry:
+ %x = alloca i64, align 8
+ store i64 0, i64* %x, align 8
+ %0 = call i64 asm sideeffect "ld $0,$1\0A\09add${2:I} $0,$0,$2", "=&r,*m,Ir"(i64* %x, i64 -1) #0
+ ret i64 %0
+}
+
+; CHECK: ld
+; CHECK-NOT: addi 3,3,4294967295
+; CHECK: addi 3,3,-1
+; CHECK: blr
+
+; Function Attrs: nounwind
+declare signext i32 @printf(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index a59fceb5bdd0..762f50a9cbe0 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -37,6 +37,7 @@ entry:
; CHECK-DAG: subfic 0, [[REG]], -160
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
; CHECK: .cfi_offset r30, -16
; CHECK: .cfi_offset lr, 16
@@ -59,6 +60,7 @@ entry:
; CHECK-FP-DAG: subfic 0, [[REG]], -160
; CHECK-FP: stdux 1, 1, 0
+; CHECK-FP: .cfi_def_cfa_register r30
; CHECK-FP: .cfi_offset r31, -8
; CHECK-FP: .cfi_offset r30, -16
; CHECK-FP: .cfi_offset lr, 16
@@ -120,6 +122,8 @@ entry:
; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
+
; CHECK: blr
; CHECK-32-LABEL: @hoo
@@ -178,6 +182,8 @@ entry:
; CHECK-DAG: subfic 0, [[REG]], -192
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
+
; CHECK: stfd 30, -16(30)
; CHECK: blr
@@ -193,6 +199,8 @@ entry:
; CHECK-FP-DAG: subfic 0, [[REG]], -192
; CHECK-FP: stdux 1, 1, 0
+; CHECK-FP: .cfi_def_cfa_register r30
+
; CHECK-FP: stfd 30, -16(30)
; CHECK-FP: blr
diff --git a/test/CodeGen/PowerPC/subreg-postra-2.ll b/test/CodeGen/PowerPC/subreg-postra-2.ll
new file mode 100644
index 000000000000..2faaa6129294
--- /dev/null
+++ b/test/CodeGen/PowerPC/subreg-postra-2.ll
@@ -0,0 +1,175 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction() #0 {
+entry:
+ br i1 undef, label %do.body, label %if.then5
+
+if.then5: ; preds = %entry
+ unreachable
+
+do.body: ; preds = %entry
+ br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i: ; preds = %do.body
+ unreachable
+
+trace_jbd2_start_commit.exit: ; preds = %do.body
+ br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116: ; preds = %trace_jbd2_start_commit.exit
+ unreachable
+
+trace_jbd2_commit_locking.exit: ; preds = %trace_jbd2_start_commit.exit
+ br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph: ; preds = %trace_jbd2_commit_locking.exit
+ unreachable
+
+while.end: ; preds = %trace_jbd2_commit_locking.exit
+ br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144: ; preds = %while.end
+ unreachable
+
+spin_unlock.exit1146: ; preds = %while.end
+ br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146
+ unreachable
+
+spin_unlock.exit1154: ; preds = %spin_unlock.exit1146
+ br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %if.end.i1166, label %do.body5.i1165
+
+do.body5.i1165: ; preds = %do.body.i1159
+ unreachable
+
+if.end.i1166: ; preds = %do.body.i1159
+ unreachable
+
+trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit
+ unreachable
+
+for.end.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i: ; preds = %for.end.i
+ br label %journal_submit_data_buffers.exit
+
+journal_submit_data_buffers.exit: ; preds = %if.then.i.i.i.i31.i, %for.end.i
+ br i1 undef, label %if.end103, label %if.then102
+
+if.then102: ; preds = %journal_submit_data_buffers.exit
+ unreachable
+
+if.end103: ; preds = %journal_submit_data_buffers.exit
+ br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182: ; preds = %if.end103
+ br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+do.body5.i1188: ; preds = %do.body5.i1188, %do.body.i1182
+ br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+if.end.i1189: ; preds = %do.body5.i1188, %do.body.i1182
+ unreachable
+
+trace_jbd2_commit_logging.exit: ; preds = %if.end103
+ br label %while.cond129.outer1451
+
+while.cond129.outer1451: ; preds = %start_journal_io, %trace_jbd2_commit_logging.exit
+ br label %while.cond129
+
+while.cond129: ; preds = %if.then135, %while.cond129.outer1451
+ br i1 undef, label %while.end246, label %if.then135
+
+if.then135: ; preds = %while.cond129
+ br i1 undef, label %start_journal_io, label %while.cond129
+
+start_journal_io: ; preds = %if.then135
+ br label %while.cond129.outer1451
+
+while.end246: ; preds = %while.cond129
+ br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277: ; preds = %while.end246
+ unreachable
+
+for.end.i1287: ; preds = %while.end246
+ br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i: ; preds = %for.end.i1287
+ unreachable
+
+journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287
+ br i1 undef, label %if.end256, label %if.then249
+
+if.then249: ; preds = %journal_finish_inode_data_buffers.exit
+ unreachable
+
+if.end256: ; preds = %journal_finish_inode_data_buffers.exit
+ br label %while.body318
+
+while.body318: ; preds = %wait_on_buffer.exit, %if.end256
+ br i1 undef, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296: ; preds = %while.body318
+ br label %wait_on_buffer.exit
+
+wait_on_buffer.exit: ; preds = %if.then.i1296, %while.body318
+ br i1 undef, label %do.body378, label %while.body318
+
+do.body378: ; preds = %wait_on_buffer.exit
+ br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph: ; preds = %do.body378
+ br label %while.body392
+
+while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+ %0 = load i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+ %tobool.i1316 = icmp eq i64 undef, 0
+ br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317: ; preds = %while.body392
+ unreachable
+
+wait_on_buffer.exit1319: ; preds = %while.body392
+ %1 = load volatile i64* %b_state.i.i1314, align 8
+ %conv.i.i1322 = and i64 %1, 1
+ %lnot404 = icmp eq i64 %conv.i.i1322, 0
+ %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+ %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #0
+ store i8* %0, i8** undef, align 8
+ %cmp.i1312 = icmp eq i32* undef, undef
+ br i1 %cmp.i1312, label %while.end418, label %while.body392
+
+while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378
+ %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+ %tobool419 = icmp eq i32 %err.4.lcssa, 0
+ br i1 %tobool419, label %if.end421, label %if.then420
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+if.then420: ; preds = %while.end418
+ unreachable
+
+if.end421: ; preds = %while.end418
+ unreachable
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/subreg-postra.ll b/test/CodeGen/PowerPC/subreg-postra.ll
new file mode 100644
index 000000000000..b10fa668cb8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/subreg-postra.ll
@@ -0,0 +1,168 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction(i32* %journal) #0 {
+entry:
+ br i1 undef, label %do.body, label %if.then5
+
+if.then5: ; preds = %entry
+ unreachable
+
+do.body: ; preds = %entry
+ br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i: ; preds = %do.body
+ unreachable
+
+trace_jbd2_start_commit.exit: ; preds = %do.body
+ br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116: ; preds = %trace_jbd2_start_commit.exit
+ br i1 undef, label %if.end.i1123, label %do.body5.i1122
+
+do.body5.i1122: ; preds = %do.body.i1116
+ unreachable
+
+if.end.i1123: ; preds = %do.body.i1116
+ br label %trace_jbd2_commit_locking.exit
+
+trace_jbd2_commit_locking.exit: ; preds = %if.end.i1123, %trace_jbd2_start_commit.exit
+ br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144: ; preds = %trace_jbd2_commit_locking.exit
+ unreachable
+
+spin_unlock.exit1146: ; preds = %trace_jbd2_commit_locking.exit
+ br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146
+ br label %spin_unlock.exit1154
+
+spin_unlock.exit1154: ; preds = %if.then.i.i.i.i1152, %spin_unlock.exit1146
+ br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159: ; preds = %spin_unlock.exit1154
+ unreachable
+
+trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %spin_unlock.exit.i, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i: ; preds = %for.body.lr.ph.i
+ unreachable
+
+spin_unlock.exit.i: ; preds = %for.body.lr.ph.i
+ unreachable
+
+for.end.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i: ; preds = %for.end.i
+ unreachable
+
+journal_submit_data_buffers.exit: ; preds = %for.end.i
+ br i1 undef, label %if.end103, label %if.then102
+
+if.then102: ; preds = %journal_submit_data_buffers.exit
+ unreachable
+
+if.end103: ; preds = %journal_submit_data_buffers.exit
+ br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182: ; preds = %if.end103
+ unreachable
+
+trace_jbd2_commit_logging.exit: ; preds = %if.end103
+ br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277: ; preds = %trace_jbd2_commit_logging.exit
+ unreachable
+
+for.end.i1287: ; preds = %trace_jbd2_commit_logging.exit
+ br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i: ; preds = %for.end.i1287
+ unreachable
+
+journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287
+ br i1 undef, label %if.end256, label %if.then249
+
+if.then249: ; preds = %journal_finish_inode_data_buffers.exit
+ unreachable
+
+if.end256: ; preds = %journal_finish_inode_data_buffers.exit
+ br i1 undef, label %do.body277, label %if.then260
+
+if.then260: ; preds = %if.end256
+ br label %do.body277
+
+do.body277: ; preds = %if.then260, %if.end256
+ br label %while.body318
+
+while.body318: ; preds = %wait_on_buffer.exit, %do.body277
+ %tobool.i1295 = icmp eq i64 undef, 0
+ br i1 %tobool.i1295, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296: ; preds = %while.body318
+ unreachable
+
+wait_on_buffer.exit: ; preds = %while.body318
+ br i1 undef, label %do.body378, label %while.body318
+
+do.body378: ; preds = %wait_on_buffer.exit
+ br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph: ; preds = %do.body378
+ br label %while.body392
+
+while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+ %0 = load i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+ %tobool.i1316 = icmp eq i64 undef, 0
+ br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317: ; preds = %while.body392
+ unreachable
+
+wait_on_buffer.exit1319: ; preds = %while.body392
+ %1 = load volatile i64* %b_state.i.i1314, align 8
+ %conv.i.i1322 = and i64 %1, 1
+ %lnot404 = icmp eq i64 %conv.i.i1322, 0
+ %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+ %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1
+ %prev.i.i.i1325 = getelementptr inbounds i8* %0, i64 8
+ %3 = load i32** null, align 8
+ store i32* %3, i32** undef, align 8
+ call void @__brelse(i32* undef) #1
+ br i1 undef, label %while.end418, label %while.body392
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378
+ %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+ br i1 undef, label %if.end421, label %if.then420
+
+if.then420: ; preds = %while.end418
+ call void @jbd2_journal_abort(i32* %journal, i32 signext %err.4.lcssa) #1
+ br label %if.end421
+
+if.end421: ; preds = %if.then420, %while.end418
+ unreachable
+}
+
+declare void @jbd2_journal_abort(i32*, i32 signext)
+
+declare void @__brelse(i32*)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/tls-pic.ll b/test/CodeGen/PowerPC/tls-pic.ll
index 9f3ab6e3b491..9ba372591e6e 100644
--- a/test/CodeGen/PowerPC/tls-pic.ll
+++ b/test/CodeGen/PowerPC/tls-pic.ll
@@ -1,5 +1,7 @@
; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s
; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s
+; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
+; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
target triple = "powerpc64-unknown-linux-gnu"
; Test correct assembly code generation for thread-local storage using
@@ -22,6 +24,16 @@ entry:
; OPT0-NEXT: nop
; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
+; OPT0-32-LABEL: main
+; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld
+; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT
+; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
+; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
+; OPT1-32-LABEL: main
+; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld
+; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT
+; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
+; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
; Test peephole optimization for thread-local storage using the
; local dynamic model.
@@ -52,4 +64,6 @@ entry:
; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l
; OPT1: bl __tls_get_addr(a2@tlsgd)
; OPT1-NEXT: nop
-
+; OPT1-32-LABEL: main2
+; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd
+; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT
diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll
new file mode 100644
index 000000000000..f884dd8a0a17
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-store2.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Test back-to-back stores of TLS variables to ensure call sequences no
+; longer overlap.
+
+@__once_callable = external thread_local global i8**
+@__once_call = external thread_local global void ()*
+
+define i64 @call_once(i64 %flag, i8* %ptr) {
+entry:
+ %var = alloca i8*, align 8
+ store i8* %ptr, i8** %var, align 8
+ store i8** %var, i8*** @__once_callable, align 8
+ store void ()* @__once_call_impl, void ()** @__once_call, align 8
+ ret i64 %flag
+}
+
+; CHECK-LABEL: call_once:
+; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
+; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
+; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
+; CHECK-NEXT: nop
+; CHECK: std {{[0-9]+}}, 0(3)
+; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
+; CHECK: addi 3, 3, __once_call@got@tlsgd@l
+; CHECK: bl __tls_get_addr(__once_call@tlsgd)
+; CHECK-NEXT: nop
+; CHECK: std {{[0-9]+}}, 0(3)
+
+declare void @__once_call_impl()
diff --git a/test/MC/Disassembler/Mips/mips2.txt b/test/MC/Disassembler/Mips/mips2.txt
new file mode 100644
index 000000000000..a604055e62ef
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips2 | FileCheck %s
+
+# CHECK: sdc3 $5, 9154($6)
+0xfc 0xc5 0x23 0xc2
+
+# CHECK: swc3 $6, 9158($7)
+0xec 0xe6 0x23 0xc6
+
+# CHECK: ldc3 $7, 9162($8)
+0xdd 0x07 0x23 0xca
+
+# CHECK: lwc3 $8, 9166($9)
+0xcd 0x28 0x23 0xce
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index bfb145e39596..bd4ae4daad04 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -1,4 +1,5 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
+
# CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x05
@@ -436,3 +437,15 @@
# CHECK: rdhwr $5, $29
# CHECK: .set pop
0x7c 0x05 0xe8 0x3b
+
+# CHECK: cache 1, 2($3)
+0xbc 0x61 0x00 0x02
+
+# CHECK: pref 3, 4($2)
+0xcc 0x43 0x00 0x04
+
+# CHECK: swc2 $9, 9158($7)
+0xe8 0xe9 0x23 0xc6
+
+# CHECK: lwc2 $8, 9162($6)
+0xc8 0xc8 0x23 0xca
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index f3d2d100cae3..d494df6f9c24 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -85,3 +85,9 @@
# CHECK: sdxc1 $f8, $4($25)
0x4f 0x24 0x40 0x09
+
+# CHECK: sdc2 $9, 9158($7)
+0xf8 0xe9 0x23 0xc6
+
+# CHECK: ldc2 $3, 9162($8)
+0xd9 0x03 0x23 0xca
diff --git a/test/MC/PowerPC/ppc64-localentry.s b/test/MC/PowerPC/ppc64-localentry.s
index 6d2c12072289..03f760ef86ea 100644
--- a/test/MC/PowerPC/ppc64-localentry.s
+++ b/test/MC/PowerPC/ppc64-localentry.s
@@ -35,6 +35,9 @@ caller_other:
nop
.size caller_other, .-caller_other
+copy1 = callee1
+copy2 = callee2
+
# Verify that use of .localentry implies ABI version 2
# CHECK: ElfHeader {
# CHECK: Flags [ (0x2)
@@ -68,3 +71,19 @@ caller_other:
# CHECK-NEXT: Other: 0
# CHECK-NEXT: Section: .text
+# Verify that symbol assignment copies the Other bits.
+# CHECK: Name: copy1
+# CHECK-NEXT: Value:
+# CHECK-NEXT: Size: 16
+# CHECK-NEXT: Binding: Local
+# CHECK-NEXT: Type: Function
+# CHECK-NEXT: Other: 96
+# CHECK-NEXT: Section: .text
+# CHECK: Name: copy2
+# CHECK-NEXT: Value:
+# CHECK-NEXT: Size: 8
+# CHECK-NEXT: Binding: Local
+# CHECK-NEXT: Type: Function
+# CHECK-NEXT: Other: 0
+# CHECK-NEXT: Section: .text
+
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index e2f832498428..ba1c563c64ab 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -1,4 +1,4 @@
-; RUN: sed -e 's@PATTERN@\%T@g' < %s > %t1
+; RUN: sed -e 's|PATTERN|%T|g' < %s > %t1
; RUN: opt -insert-gcov-profiling -disable-output < %t1
; RUN: rm %T/linezero.gcno %t1
; REQUIRES: shell
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
index a8020e6014b0..e462712fe7a5 100644
--- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin"
; CHECK-LABEL: @test(
; CHECK: if.end.i126:
-; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+; CHECK: %exitcond = icmp ne i8* %destYPixelPtr.010.i, getelementptr (i8* null, i32 undef)
define void @test() nounwind {
entry:
br label %while.cond
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
index e4c31d125c60..9e55a1791341 100644
--- a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8
; CHECK: for.body:
; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16
; CHECK: for.body:
; CHECK: phi i8 addrspace(3)*
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index 4736f857bcce..eeffd0f55024 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: @foo(
; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
-; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
+; CHECK: %exitcond = icmp ne i32 %indvars.iv, 511
define void @foo() #0 {
entry:
br label %for.body
@@ -21,7 +21,7 @@ for.end: ; preds = %for.body
; Check that post-incrementing the backedge taken count does not overflow.
; CHECK-LABEL: @postinc(
-; CHECK: icmp eq i32 %indvars.iv.next, 256
+; CHECK: icmp eq i32 %indvars.iv, 255
define i32 @postinc() #0 {
entry:
br label %do.body
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 1fdcdd1ec3a4..efb96bdbe6b5 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -82,15 +82,23 @@ exit:
; Perform LFTR without generating extra preheader code.
define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
i32 %irow, i32 %ilead) nounwind {
-; CHECK: entry:
-; CHECK-NOT: zext
-; CHECK-NOT: add
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: phi i64
+; CHECK-LABEL: @guardedloop(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i32 1, %irow
+; CHECK-NEXT: br i1 %[[cmp]], label %[[loop_preheader:.*]], label %[[return:.*]]
+
+; CHECK: [[loop_preheader]]:
+; CHECK-NEXT: %[[sext:.*]] = sext i32 %ilead to i64
+; CHECK-NEXT: %[[add:.*]] = add i32 %irow, -1
+; CHECK-NEXT: br label %[[loop:.*]]
+
+; CHECK: [[loop]]:
+; CHECK-NEXT: %[[indvars_iv2:.*]] = phi i64
+; CHECK-NEXT: phi i64
; CHECK-NOT: phi
-; CHECK: icmp ne
-; CHECK: br i1
+; CHECK: %[[lftr_wideiv:.*]] = trunc i64 %[[indvars_iv2]] to i32
+; CHECK-NEXT: %[[exitcond:.*]] = icmp ne i32 %[[lftr_wideiv]], %[[add]]
+; CHECK-NEXT: br i1 %[[exitcond]], label %[[loop]], label
entry:
%cmp = icmp slt i32 1, %irow
br i1 %cmp, label %loop, label %return
diff --git a/test/Transforms/IndVarSimplify/pr20680.ll b/test/Transforms/IndVarSimplify/pr20680.ll
new file mode 100644
index 000000000000..88a7fd765d0b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr20680.ll
@@ -0,0 +1,219 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry:
+; CHECK: br label %[[for_cond2_preheader:.*]]
+
+; CHECK: [[for_cond2_preheader]]:
+; CHECK-NEXT: %[[indvars_iv:.*]] = phi i32 [ %[[indvars_iv_next:.*]], %[[for_inc13:.*]] ], [ -14, %entry ]
+; br i1 {{.*}}, label %[[for_inc13]], label %
+entry:
+ %0 = load i32* @a, align 4
+ %tobool2 = icmp eq i32 %0, 0
+ %1 = load i32* @a, align 4
+ %tobool = icmp eq i32 %1, 0
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.inc13, %entry
+ %storemerge15 = phi i8 [ -14, %entry ], [ %inc14, %for.inc13 ]
+ br i1 %tobool2, label %for.inc13, label %for.body3.lr.ph
+
+for.body3.lr.ph: ; preds = %for.cond2.preheader
+ %tobool5 = icmp eq i8 %storemerge15, 0
+ %conv7 = sext i8 %storemerge15 to i32
+ %2 = add nsw i32 %conv7, 1
+ %3 = icmp ult i32 %2, 3
+ %div = select i1 %3, i32 %conv7, i32 0
+ br i1 %tobool5, label %for.body3.lr.ph.split.us, label %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+
+for.body3.lr.ph.for.body3.lr.ph.split_crit_edge: ; preds = %for.body3.lr.ph
+ br label %for.body3.lr.ph.split
+
+for.body3.lr.ph.split.us: ; preds = %for.body3.lr.ph
+ br i1 %tobool, label %for.body3.lr.ph.split.us.split.us, label %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+
+for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge: ; preds = %for.body3.lr.ph.split.us
+ br label %for.body3.lr.ph.split.us.split
+
+for.body3.lr.ph.split.us.split.us: ; preds = %for.body3.lr.ph.split.us
+ br label %for.body3.us.us
+
+for.body3.us.us: ; preds = %for.cond2.loopexit.us.us, %for.body3.lr.ph.split.us.split.us
+ br i1 true, label %cond.false.us.us, label %cond.end.us.us
+
+cond.false.us.us: ; preds = %for.body3.us.us
+ br label %cond.end.us.us
+
+cond.end.us.us: ; preds = %cond.false.us.us, %for.body3.us.us
+ %cond.us.us = phi i32 [ %div, %cond.false.us.us ], [ %conv7, %for.body3.us.us ]
+ %4 = load i32* @b, align 4
+ %cmp91.us.us = icmp slt i32 %4, 1
+ br i1 %cmp91.us.us, label %for.inc.lr.ph.us.us, label %for.cond2.loopexit.us.us
+
+for.cond2.loopexit.us.us: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us.us, %cond.end.us.us
+ br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us, label %for.body3.us.us
+
+for.inc.lr.ph.us.us: ; preds = %cond.end.us.us
+ br label %for.inc.us.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us.us: ; preds = %for.inc.us.us
+ %inc.lcssa.us.us = phi i32 [ %inc.us.us, %for.inc.us.us ]
+ store i32 %inc.lcssa.us.us, i32* @b, align 4
+ br label %for.cond2.loopexit.us.us
+
+for.inc.us.us: ; preds = %for.inc.us.us, %for.inc.lr.ph.us.us
+ %5 = phi i32 [ %4, %for.inc.lr.ph.us.us ], [ %inc.us.us, %for.inc.us.us ]
+ %inc.us.us = add nsw i32 %5, 1
+ %cmp9.us.us = icmp slt i32 %inc.us.us, 1
+ br i1 %cmp9.us.us, label %for.inc.us.us, label %for.cond8.for.cond2.loopexit_crit_edge.us.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us: ; preds = %for.cond2.loopexit.us.us
+ %cond.lcssa.ph.us.ph.us = phi i32 [ %cond.us.us, %for.cond2.loopexit.us.us ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.body3.lr.ph.split.us.split: ; preds = %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+ br label %for.body3.us
+
+for.body3.us: ; preds = %for.cond2.loopexit.us, %for.body3.lr.ph.split.us.split
+ br i1 true, label %cond.false.us, label %cond.end.us
+
+cond.false.us: ; preds = %for.body3.us
+ br label %cond.end.us
+
+cond.end.us: ; preds = %cond.false.us, %for.body3.us
+ %cond.us = phi i32 [ %div, %cond.false.us ], [ %conv7, %for.body3.us ]
+ %6 = load i32* @b, align 4
+ %cmp91.us = icmp slt i32 %6, 1
+ br i1 %cmp91.us, label %for.inc.lr.ph.us, label %for.cond2.loopexit.us
+
+for.inc.us: ; preds = %for.inc.lr.ph.us, %for.inc.us
+ %7 = phi i32 [ %6, %for.inc.lr.ph.us ], [ %inc.us, %for.inc.us ]
+ %inc.us = add nsw i32 %7, 1
+ %cmp9.us = icmp slt i32 %inc.us, 1
+ br i1 %cmp9.us, label %for.inc.us, label %for.cond8.for.cond2.loopexit_crit_edge.us
+
+for.cond2.loopexit.us: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us, %cond.end.us
+ br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, label %for.body3.us
+
+for.inc.lr.ph.us: ; preds = %cond.end.us
+ br label %for.inc.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us: ; preds = %for.inc.us
+ %inc.lcssa.us = phi i32 [ %inc.us, %for.inc.us ]
+ store i32 %inc.lcssa.us, i32* @b, align 4
+ br label %for.cond2.loopexit.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa: ; preds = %for.cond2.loopexit.us
+ %cond.lcssa.ph.us.ph = phi i32 [ %cond.us, %for.cond2.loopexit.us ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us
+ %cond.lcssa.ph.us = phi i32 [ %cond.lcssa.ph.us.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa ], [ %cond.lcssa.ph.us.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us ]
+ br label %for.cond2.for.inc13_crit_edge
+
+for.body3.lr.ph.split: ; preds = %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+ br i1 %tobool, label %for.body3.lr.ph.split.split.us, label %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+
+for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge: ; preds = %for.body3.lr.ph.split
+ br label %for.body3.lr.ph.split.split
+
+for.body3.lr.ph.split.split.us: ; preds = %for.body3.lr.ph.split
+ br label %for.body3.us3
+
+for.body3.us3: ; preds = %for.cond2.loopexit.us11, %for.body3.lr.ph.split.split.us
+ br i1 false, label %cond.false.us4, label %cond.end.us5
+
+cond.false.us4: ; preds = %for.body3.us3
+ br label %cond.end.us5
+
+cond.end.us5: ; preds = %cond.false.us4, %for.body3.us3
+ %cond.us6 = phi i32 [ %div, %cond.false.us4 ], [ %conv7, %for.body3.us3 ]
+ %8 = load i32* @b, align 4
+ %cmp91.us7 = icmp slt i32 %8, 1
+ br i1 %cmp91.us7, label %for.inc.lr.ph.us12, label %for.cond2.loopexit.us11
+
+for.inc.us8: ; preds = %for.inc.lr.ph.us12, %for.inc.us8
+ %9 = phi i32 [ %8, %for.inc.lr.ph.us12 ], [ %inc.us9, %for.inc.us8 ]
+ %inc.us9 = add nsw i32 %9, 1
+ %cmp9.us10 = icmp slt i32 %inc.us9, 1
+ br i1 %cmp9.us10, label %for.inc.us8, label %for.cond8.for.cond2.loopexit_crit_edge.us13
+
+for.cond2.loopexit.us11: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us13, %cond.end.us5
+ br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us, label %for.body3.us3
+
+for.inc.lr.ph.us12: ; preds = %cond.end.us5
+ br label %for.inc.us8
+
+for.cond8.for.cond2.loopexit_crit_edge.us13: ; preds = %for.inc.us8
+ %inc.lcssa.us14 = phi i32 [ %inc.us9, %for.inc.us8 ]
+ store i32 %inc.lcssa.us14, i32* @b, align 4
+ br label %for.cond2.loopexit.us11
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us: ; preds = %for.cond2.loopexit.us11
+ %cond.lcssa.ph.ph.us = phi i32 [ %cond.us6, %for.cond2.loopexit.us11 ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.body3.lr.ph.split.split: ; preds = %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+ br label %for.body3
+
+for.cond8.for.cond2.loopexit_crit_edge: ; preds = %for.inc
+ %inc.lcssa = phi i32 [ %inc, %for.inc ]
+ store i32 %inc.lcssa, i32* @b, align 4
+ br label %for.cond2.loopexit
+
+for.cond2.loopexit: ; preds = %cond.end, %for.cond8.for.cond2.loopexit_crit_edge
+ br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, label %for.body3
+
+for.body3: ; preds = %for.cond2.loopexit, %for.body3.lr.ph.split.split
+ br i1 false, label %cond.false, label %cond.end
+
+cond.false: ; preds = %for.body3
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %for.body3
+ %cond = phi i32 [ %div, %cond.false ], [ %conv7, %for.body3 ]
+ %10 = load i32* @b, align 4
+ %cmp91 = icmp slt i32 %10, 1
+ br i1 %cmp91, label %for.inc.lr.ph, label %for.cond2.loopexit
+
+for.inc.lr.ph: ; preds = %cond.end
+ br label %for.inc
+
+for.inc: ; preds = %for.inc, %for.inc.lr.ph
+ %11 = phi i32 [ %10, %for.inc.lr.ph ], [ %inc, %for.inc ]
+ %inc = add nsw i32 %11, 1
+ %cmp9 = icmp slt i32 %inc, 1
+ br i1 %cmp9, label %for.inc, label %for.cond8.for.cond2.loopexit_crit_edge
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa: ; preds = %for.cond2.loopexit
+ %cond.lcssa.ph.ph = phi i32 [ %cond, %for.cond2.loopexit ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.cond2.for.inc13_crit_edge.us-lcssa: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us
+ %cond.lcssa.ph = phi i32 [ %cond.lcssa.ph.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa ], [ %cond.lcssa.ph.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us ]
+ br label %for.cond2.for.inc13_crit_edge
+
+for.cond2.for.inc13_crit_edge: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us
+ %cond.lcssa = phi i32 [ %cond.lcssa.ph, %for.cond2.for.inc13_crit_edge.us-lcssa ], [ %cond.lcssa.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us ]
+ store i32 %cond.lcssa, i32* @c, align 4
+ br label %for.inc13
+
+; CHECK: [[for_inc13]]:
+; CHECK-NEXT: %[[indvars_iv_next]] = add nuw nsw i32 %[[indvars_iv]], 1
+; CHECK-NEXT: %[[exitcond4:.*]] = icmp ne i32 %[[indvars_iv]], -1
+; CHECK-NEXT: br i1 %[[exitcond4]], label %[[for_cond2_preheader]], label %[[for_end15:.*]]
+for.inc13: ; preds = %for.cond2.for.inc13_crit_edge, %for.cond2.preheader
+ %inc14 = add i8 %storemerge15, 1
+ %cmp = icmp ugt i8 %inc14, 50
+ br i1 %cmp, label %for.cond2.preheader, label %for.end15
+
+; CHECK: [[for_end15]]:
+; CHECK-NEXT: ret void
+for.end15: ; preds = %for.inc13
+ ret void
+}
diff --git a/test/Transforms/LICM/PR21582.ll b/test/Transforms/LICM/PR21582.ll
new file mode 100644
index 000000000000..c068c2f8e32b
--- /dev/null
+++ b/test/Transforms/LICM/PR21582.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+@b = external global i32, align 4
+@fn3.i = external global i32, align 4
+
+declare i32 @g() nounwind
+
+define i32 @f() {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.end, %entry
+; CHECK-LABEL: for.cond:
+; CHECK: store i32 0, i32* @b
+ store i32 0, i32* @b, align 4
+ br i1 true, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %for.cond
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %g.15 = phi i32 [ undef, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx2 = getelementptr inbounds i32* @fn3.i, i64 0
+ %0 = load i32* %arrayidx2, align 4
+ %call = call i32 @g()
+ br i1 false, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %for.cond
+ %whatever = phi i32 [ %call, %for.end.loopexit ], [ undef, %for.cond ]
+ br i1 false, label %for.cond, label %if.then
+
+if.then: ; preds = %for.end
+; CHECK-LABEL: if.then:
+; CHECK: phi i32 [ {{.*}}, %for.end ]
+; CHECK-NOT: store i32 0, i32* @b
+; CHECK: ret i32
+ ret i32 %whatever
+}
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 4244f157d9f8..69266693c479 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -3,12 +3,11 @@
; UDiv is safe to speculate if the denominator is known non-zero.
; CHECK-LABEL: @safe_udiv(
-; CHECK: %div = udiv i64 %x, %or
+; CHECK: %div = udiv i64 %x, 2
; CHECK-NEXT: br label %for.body
define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
entry:
- %or = or i64 %m, 1
br label %for.body
for.body: ; preds = %entry, %for.inc
@@ -19,7 +18,7 @@ for.body: ; preds = %entry, %for.inc
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
- %div = udiv i64 %x, %or
+ %div = udiv i64 %x, 2
%arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
store i64 %div, i64* %arrayidx1, align 8
br label %for.inc
@@ -69,13 +68,12 @@ for.end: ; preds = %for.inc, %entry
; known to have at least one zero bit.
; CHECK-LABEL: @safe_sdiv(
-; CHECK: %div = sdiv i64 %x, %or
+; CHECK: %div = sdiv i64 %x, 2
; CHECK-NEXT: br label %for.body
define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
entry:
%and = and i64 %m, -3
- %or = or i64 %and, 1
br label %for.body
for.body: ; preds = %entry, %for.inc
@@ -86,7 +84,7 @@ for.body: ; preds = %entry, %for.inc
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
- %div = sdiv i64 %x, %or
+ %div = sdiv i64 %x, 2
%arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
store i64 %div, i64* %arrayidx1, align 8
br label %for.inc
diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
new file mode 100644
index 000000000000..624ee7e50597
--- /dev/null
+++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -0,0 +1,142 @@
+; This test is based on one of benchmarks from SPEC2006. It exposes a bug with
+; incorrect updating of the dom-tree.
+; RUN: opt < %s -loop-vectorize -verify-dom-info
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@PL_utf8skip = external constant [0 x i8]
+
+; Function Attrs: nounwind ssp uwtable
+define void @Perl_pp_quotemeta() #0 {
+ %len = alloca i64, align 8
+ br i1 undef, label %2, label %1
+
+; <label>:1 ; preds = %0
+ br label %3
+
+; <label>:2 ; preds = %0
+ br label %3
+
+; <label>:3 ; preds = %2, %1
+ br i1 undef, label %34, label %4
+
+; <label>:4 ; preds = %3
+ br i1 undef, label %5, label %6
+
+; <label>:5 ; preds = %4
+ br label %6
+
+; <label>:6 ; preds = %5, %4
+ br i1 undef, label %7, label %8
+
+; <label>:7 ; preds = %6
+ br label %8
+
+; <label>:8 ; preds = %7, %6
+ br i1 undef, label %.preheader, label %9
+
+.preheader: ; preds = %9, %8
+ br i1 undef, label %.loopexit, label %.lr.ph
+
+; <label>:9 ; preds = %8
+ br i1 undef, label %thread-pre-split.preheader, label %.preheader
+
+thread-pre-split.preheader: ; preds = %9
+ br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.thread-pre-split.loopexit_crit_edge: ; preds = %19
+ %scevgep.sum = xor i64 %umax, -1
+ %scevgep45 = getelementptr i8* %d.020, i64 %scevgep.sum
+ br label %thread-pre-split.loopexit
+
+thread-pre-split.loopexit: ; preds = %11, %.thread-pre-split.loopexit_crit_edge
+ %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
+ br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.lr.ph21: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+ %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+ %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+ br i1 undef, label %11, label %22
+
+; <label>:11 ; preds = %.lr.ph21
+ %12 = getelementptr inbounds [0 x i8]* @PL_utf8skip, i64 0, i64 undef
+ %13 = load i8* %12, align 1
+ %14 = zext i8 %13 to i64
+ %15 = icmp ugt i64 %14, %10
+ %. = select i1 %15, i64 %10, i64 %14
+ br i1 undef, label %thread-pre-split.loopexit, label %.lr.ph28
+
+.lr.ph28: ; preds = %11
+ %16 = xor i64 %10, -1
+ %17 = xor i64 %14, -1
+ %18 = icmp ugt i64 %16, %17
+ %umax = select i1 %18, i64 %16, i64 %17
+ br label %19
+
+; <label>:19 ; preds = %19, %.lr.ph28
+ %ulen.126 = phi i64 [ %., %.lr.ph28 ], [ %20, %19 ]
+ %20 = add i64 %ulen.126, -1
+ %21 = icmp eq i64 %20, 0
+ br i1 %21, label %.thread-pre-split.loopexit_crit_edge, label %19
+
+; <label>:22 ; preds = %.lr.ph21
+ br i1 undef, label %26, label %23
+
+; <label>:23 ; preds = %22
+ br i1 undef, label %26, label %24
+
+; <label>:24 ; preds = %23
+ br i1 undef, label %26, label %25
+
+; <label>:25 ; preds = %24
+ br label %26
+
+; <label>:26 ; preds = %25, %24, %23, %22
+ %27 = load i64* %len, align 8
+ %28 = add i64 %27, -1
+ br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+thread-pre-split._crit_edge: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+ br label %.loopexit
+
+.lr.ph: ; preds = %33, %.preheader
+ br i1 undef, label %29, label %thread-pre-split5
+
+; <label>:29 ; preds = %.lr.ph
+ br i1 undef, label %33, label %30
+
+; <label>:30 ; preds = %29
+ br i1 undef, label %33, label %31
+
+thread-pre-split5: ; preds = %.lr.ph
+ br i1 undef, label %33, label %31
+
+; <label>:31 ; preds = %thread-pre-split5, %30
+ br i1 undef, label %33, label %32
+
+; <label>:32 ; preds = %31
+ br label %33
+
+; <label>:33 ; preds = %32, %31, %thread-pre-split5, %30, %29
+ br i1 undef, label %.loopexit, label %.lr.ph
+
+.loopexit: ; preds = %33, %thread-pre-split._crit_edge, %.preheader
+ br label %35
+
+; <label>:34 ; preds = %3
+ br label %35
+
+; <label>:35 ; preds = %34, %.loopexit
+ br i1 undef, label %37, label %36
+
+; <label>:36 ; preds = %35
+ br label %37
+
+; <label>:37 ; preds = %36, %35
+ ret void
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.6.0 "}
diff --git a/test/Transforms/LoopVectorize/loop-form.ll b/test/Transforms/LoopVectorize/loop-form.ll
new file mode 100644
index 000000000000..138df1d96138
--- /dev/null
+++ b/test/Transforms/LoopVectorize/loop-form.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that we vectorize only bottom-tested loops.
+; This is a reduced testcase from PR21302.
+;
+; rdar://problem/18886083
+
+%struct.X = type { i32, i16 }
+; CHECK-LABEL: @foo(
+; CHECK-NOT: vector.body
+
+define void @foo(i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i, %n
+ br i1 %cmp, label %for.body, label %if.end
+
+for.body:
+ %iprom = sext i32 %i to i64
+ %b = getelementptr inbounds %struct.X* undef, i64 %iprom, i32 1
+ store i16 0, i16* %b, align 4
+ %inc = add nsw i32 %i, 1
+ br label %for.cond
+
+if.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 5bf7020a475a..848ffc428c5f 100644
--- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -31,25 +31,23 @@ define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -60,25 +58,23 @@ define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -89,25 +85,23 @@ define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
%0 = load i32* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -119,25 +113,23 @@ define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -149,25 +141,23 @@ define void @arst0(i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
%0 = load i32* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -180,25 +170,23 @@ define void @arst1(i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -210,25 +198,23 @@ define void @aoeu(i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
%0 = load i32 addrspace(2)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 212b37cceab3..4e98ec504f5b 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -8,26 +8,24 @@ define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addr
; CHECK-LABEL: @add_ints_1_1_1(
; CHECK: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
%1 = load i32 addrspace(1)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -35,26 +33,24 @@ define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
; CHECK-LABEL: @add_ints_as_1_0_0(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %i.01
%0 = load i32* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
%1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -62,26 +58,24 @@ define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
; CHECK-LABEL: @add_ints_as_0_1_0(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
%1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -89,26 +83,24 @@ define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)*
; CHECK-LABEL: @add_ints_as_0_1_1(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
%1 = load i32 addrspace(1)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -116,26 +108,24 @@ define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)*
; CHECK-LABEL: @add_ints_as_0_1_2(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.01
%1 = load i32 addrspace(2)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
diff --git a/test/Transforms/LoopVectorize/unsized-pointee-crash.ll b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
new file mode 100644
index 000000000000..5cc98378b952
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry:
+ br label %for.body
+
+for.body:
+ %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ]
+ %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = bitcast i32 (...)* %b.05 to i8*
+ %add.ptr = getelementptr i8* %0, i64 1
+ %1 = bitcast i8* %add.ptr to i32 (...)*
+; CHECK: %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8* %[[cst]], i64 1
+ %inc = add nsw i32 %a.04, 1
+ %exitcond = icmp eq i32 %a.04, 63
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll
index 92ec24f726ee..9adf2bb6cf14 100644
--- a/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/test/Transforms/LoopVectorize/vect.stats.ll
@@ -13,53 +13,47 @@ target triple = "x86_64-unknown-linux-gnu"
define void @vectorized(float* nocapture %a, i64 %size) {
entry:
- %cmp1 = icmp sgt i64 %size, 0
- br i1 %cmp1, label %for.header, label %for.end
+ %cmp1 = icmp sle i64 %size, 0
+ %cmp21 = icmp sgt i64 0, %size
+ %or.cond = or i1 %cmp1, %cmp21
+ br i1 %or.cond, label %for.end, label %for.body
-for.header:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %cmp2 = icmp sgt i64 %indvars.iv, %size
- br i1 %cmp2, label %for.end, label %for.body
-
-for.body:
-
- %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+for.body: ; preds = %entry, %for.body
+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv2
%0 = load float* %arrayidx, align 4
%mul = fmul float %0, %0
store float %mul, float* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+ br i1 %cmp2, label %for.end, label %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.header
-
-for.end:
+for.end: ; preds = %entry, %for.body
ret void
}
define void @not_vectorized(float* nocapture %a, i64 %size) {
entry:
- %cmp1 = icmp sgt i64 %size, 0
- br i1 %cmp1, label %for.header, label %for.end
+ %cmp1 = icmp sle i64 %size, 0
+ %cmp21 = icmp sgt i64 0, %size
+ %or.cond = or i1 %cmp1, %cmp21
+ br i1 %or.cond, label %for.end, label %for.body
-for.header:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %cmp2 = icmp sgt i64 %indvars.iv, %size
- br i1 %cmp2, label %for.end, label %for.body
-
-for.body:
-
- %0 = add nsw i64 %indvars.iv, -5
+for.body: ; preds = %entry, %for.body
+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %0 = add nsw i64 %indvars.iv2, -5
%arrayidx = getelementptr inbounds float* %a, i64 %0
%1 = load float* %arrayidx, align 4
- %2 = add nsw i64 %indvars.iv, 2
+ %2 = add nsw i64 %indvars.iv2, 2
%arrayidx2 = getelementptr inbounds float* %a, i64 %2
%3 = load float* %arrayidx2, align 4
%mul = fmul float %1, %3
- %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv2
store float %mul, float* %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+ br i1 %cmp2, label %for.end, label %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.header
-
-for.end:
+for.end: ; preds = %entry, %for.body
ret void
} \ No newline at end of file
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 8d82964dcbd1..946bc40b0587 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -501,3 +501,37 @@ end:
; CHECK-NOT: load
; CHECK: ret float %[[phi]]
}
+
+%struct.S = type { i32 }
+
+; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI
+; which requires the rewriting of the speculated PHI to handle insertion
+; when the incoming pointer is itself from a PHI node. We would previously
+; insert a bitcast instruction *before* a PHI, producing an invalid module;
+; make sure we insert *after* the first non-PHI instruction.
+define void @PR20822() {
+; CHECK-LABEL: @PR20822(
+entry:
+ %f = alloca %struct.S, align 4
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
+ br i1 undef, label %if.end, label %for.cond
+
+for.cond: ; preds = %for.cond, %entry
+ br label %if.end
+
+if.end: ; preds = %for.cond, %entry
+ %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ]
+; CHECK: phi {{.*}} %[[cast]]
+; CHECK: phi i32
+ phi i32 [ undef, %entry ], [ undef, %for.cond ]
+ br i1 undef, label %if.then5, label %if.then2
+
+if.then2: ; preds = %if.end
+ br label %if.then5
+
+if.then5: ; preds = %if.then2, %if.end
+ %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ]
+ store %struct.S undef, %struct.S* %f1, align 4
+ ret void
+}