diff options
Diffstat (limited to 'test/CodeGen')
42 files changed, 2492 insertions, 307 deletions
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll new file mode 100644 index 000000000000..6d2564ba1ab6 --- /dev/null +++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s + +declare cc 10 void @g() + +define cc 10 void @test_direct_tail() { +; CHECK-LABEL: test_direct_tail: +; CHECK: b g + + tail call cc10 void @g() + ret void +} + +@ind_func = global void()* zeroinitializer + +define cc 10 void @test_indirect_tail() { +; CHECK-LABEL: test_indirect_tail: +; CHECK: bx {{r[0-9]+}} + %func = load void()** @ind_func + tail call cc10 void()* %func() + ret void +} diff --git a/test/CodeGen/Mips/abicalls.ll b/test/CodeGen/Mips/abicalls.ll index 6fa33aa158ad..7edc3e25c352 100644 --- a/test/CodeGen/Mips/abicalls.ll +++ b/test/CodeGen/Mips/abicalls.ll @@ -1,16 +1,11 @@ -; -; When the assembler is ready a .s file for it will -; be created. +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=STATIC %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s +; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s +; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s -; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux -; TODO need to support -mno-abicalls +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr noabicalls -relocation-model=static %s -o - | FileCheck -implicit-check-not='.abicalls' -implicit-check-not='pic0' %s -; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s -; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s -; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s -; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s +; ABICALLS: .abicalls -; CHECK-STATIC: .abicalls -; CHECK-STATIC-NEXT: pic0 -; CHECK-PIC: .abicalls -; CHECK-PIC-NOT: pic0 +; STATIC: pic0 +; PIC-NOT: pic0 diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll index f4118ecec79d..78fd8296178e 100644 --- a/test/CodeGen/Mips/atomic.ll +++ b/test/CodeGen/Mips/atomic.ll @@ -12,7 +12,7 @@ @x = common global i32 0, align 4 -define i32 @AtomicLoadAdd32(i32 %incr) nounwind { +define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { entry: %0 = atomicrmw add i32* @x, i32 %incr monotonic ret i32 %0 @@ -29,7 +29,7 @@ entry: ; ALL: beqz $[[R2]], $[[BB0]] } -define i32 @AtomicLoadNand32(i32 %incr) nounwind { +define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { entry: %0 = atomicrmw nand i32* @x, i32 %incr monotonic ret i32 %0 @@ -47,7 +47,7 @@ entry: ; ALL: beqz $[[R2]], $[[BB0]] } -define i32 @AtomicSwap32(i32 %newval) nounwind { +define i32 @AtomicSwap32(i32 signext %newval) nounwind { entry: %newval.addr = alloca i32, align 4 store i32 %newval, i32* %newval.addr, align 4 @@ -66,7 +66,7 @@ entry: ; ALL: beqz $[[R2]], $[[BB0]] } -define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind { +define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { entry: %newval.addr = alloca i32, align 4 store i32 %newval, i32* %newval.addr, align 4 @@ -293,7 +293,7 @@ entry: ; HAS-SEB-SEH: seb $2, $[[R17]] } -define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind { +define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind { entry: %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic %1 = extractvalue { i8, i1 } %0, 1 @@ -381,7 +381,7 @@ entry: @countsint = common global i32 0, align 4 -define i32 @CheckSync(i32 %v) nounwind noinline { +define i32 @CheckSync(i32 signext %v) nounwind noinline { entry: %0 = atomicrmw add i32* @countsint, i32 %v seq_cst ret i32 %0 @@ -415,7 +415,7 @@ entry: ; Check that MIPS32R6 has the correct offset range. ; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store. -define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind { +define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { entry: %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic ret i32 %0 diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll index 812eef137773..f182e65b0266 100644 --- a/test/CodeGen/Mips/bswap.ll +++ b/test/CodeGen/Mips/bswap.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64 ; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16 -define i32 @bswap32(i32 %x) nounwind readnone { +define i32 @bswap32(i32 signext %x) nounwind readnone { entry: ; MIPS32-LABEL: bswap32: ; MIPS32: wsbh $[[R0:[0-9]+]] @@ -29,7 +29,7 @@ entry: ret i32 %or.3 } -define i64 @bswap64(i64 %x) nounwind readnone { +define i64 @bswap64(i64 signext %x) nounwind readnone { entry: ; MIPS32-LABEL: bswap64: ; MIPS32: wsbh $[[R0:[0-9]+]] @@ -72,24 +72,24 @@ entry: define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone { entry: ; MIPS32-LABEL: bswapv4i32: -; MIPS32: wsbh $[[R0:[0-9]+]] -; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS32: wsbh $[[R0:[0-9]+]] -; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS32: wsbh $[[R0:[0-9]+]] -; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS32: wsbh $[[R0:[0-9]+]] -; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS32-DAG: wsbh $[[R0:[0-9]+]] +; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS32-DAG: wsbh $[[R0:[0-9]+]] +; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS32-DAG: wsbh $[[R0:[0-9]+]] +; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS32-DAG: wsbh $[[R0:[0-9]+]] +; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 ; MIPS64-LABEL: bswapv4i32: -; MIPS64: wsbh $[[R0:[0-9]+]] -; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS64: wsbh $[[R0:[0-9]+]] -; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS64: wsbh $[[R0:[0-9]+]] -; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16 -; MIPS64: wsbh $[[R0:[0-9]+]] -; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: wsbh $[[R0:[0-9]+]] +; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: wsbh $[[R0:[0-9]+]] +; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: wsbh $[[R0:[0-9]+]] +; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: wsbh $[[R0:[0-9]+]] +; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16 ; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and ; would be very long diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll index e2119ec08028..14a3baa7f539 100644 --- a/test/CodeGen/Mips/cconv/arguments-float.ll +++ b/test/CodeGen/Mips/cconv/arguments-float.ll @@ -69,26 +69,26 @@ entry: ; O32-DAG: sw [[R4]], 28([[R2]]) ; NEW-DAG: sd $6, 24([[R2]]) -; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 36($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp) ; O32-DAG: sw [[R3]], 32([[R2]]) ; O32-DAG: sw [[R4]], 36([[R2]]) ; NEW-DAG: sd $7, 32([[R2]]) -; O32-DAG: lw [[R3:\$[0-9]+]], 40($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 44($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp) ; O32-DAG: sw [[R3]], 40([[R2]]) ; O32-DAG: sw [[R4]], 44([[R2]]) ; NEW-DAG: sd $8, 40([[R2]]) -; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 52($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp) ; O32-DAG: sw [[R3]], 48([[R2]]) ; O32-DAG: sw [[R4]], 52([[R2]]) ; NEW-DAG: sd $9, 48([[R2]]) -; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp) ; O32-DAG: sw [[R3]], 56([[R2]]) ; O32-DAG: sw [[R4]], 60([[R2]]) ; NEW-DAG: sd $10, 56([[R2]]) @@ -135,8 +135,8 @@ entry: ; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(floats)( ; The first four arguments are the same in O32/N32/N64. -; The first argument isn't floating point so floating point registers are not -; used. +; The first argument is floating point but soft-float is enabled so floating +; point registers are not used. ; MD00305 and GCC disagree on this one. MD00305 says that floats are treated ; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte ; aligned and occupying one slot. We'll use GCC's definition. @@ -195,7 +195,7 @@ entry: ; O32-DAG: sw $7, 12([[R2]]) ; NEW-DAG: sd $5, 8([[R2]]) -define void @float_arg2(i8 %a, float %b) nounwind { +define void @float_arg2(i8 signext %a, float %b) nounwind { entry: %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1 store volatile i8 %a, i8* %0 diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll index aadf7d18c17d..70ccf14c5450 100644 --- a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll +++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll @@ -4,11 +4,11 @@ ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s -; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s -; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s -; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s -; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s ; Test the effect of varargs on floating point types in the non-variable part ; of the argument list as specified by section 2 of the MIPSpro N32 Handbook. @@ -34,6 +34,7 @@ entry: %b = va_arg i8** %ap, double %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2 store volatile double %b, double* %1 + call void @llvm.va_end(i8* %ap2) ret void } @@ -98,6 +99,7 @@ entry: %b = va_arg i8** %ap, float %1 = getelementptr [11 x float]* @floats, i32 0, i32 2 store volatile float %b, float* %1 + call void @llvm.va_end(i8* %ap2) ret void } @@ -140,16 +142,18 @@ entry: ; Increment the pointer then get the varargs arg ; LLVM will rebind the load to the stack pointer instead of the varargs pointer ; during lowering. This is fine and doesn't change the behaviour. -; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine -; since the stack is always aligned. +; Also, in big-endian mode the offset must be increased by 4 to retrieve the +; correct half of the argument slot. +; ; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4 ; O32-DAG: sw [[VAPTR]], 4($sp) -; N32-DAG: ori [[VAPTR]], [[VAPTR]], 4 +; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8 ; N32-DAG: sw [[VAPTR]], 4($sp) -; N64-DAG: ori [[VAPTR]], [[VAPTR]], 4 +; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8 ; N64-DAG: sd [[VAPTR]], 0($sp) ; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp) -; NEW-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp) +; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp) +; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp) ; ALL-DAG: swc1 [[FTMP1]], 8([[R2]]) declare void @llvm.va_start(i8*) diff --git a/test/CodeGen/Mips/cconv/arguments-struct.ll b/test/CodeGen/Mips/cconv/arguments-struct.ll new file mode 100644 index 000000000000..c1bc84ee7a04 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-struct.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s + +; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s +; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s + +; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s +; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s + +; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s +; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s + +; Test small structures for all ABI's and byte orders. +; +; N32/N64 are identical in this area so their checks have been combined into +; the 'NEW' prefix (the N stands for New). + +@bytes = global [2 x i8] zeroinitializer + +define void @s_i8(i8 inreg %a) nounwind { +entry: + store i8 %a, i8* getelementptr inbounds ([2 x i8]* @bytes, i32 0, i32 1) + ret void +} + +; ALL-LABEL: s_i8: + +; SYM32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(bytes) +; SYM32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(bytes) + +; SYM64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(bytes)( + +; O32-BE-DAG: srl [[ARG:\$[0-9]+]], $4, 24 +; O32-BE-DAG: sb [[ARG]], 1([[PTR]]) + +; O32-LE-DAG: sb $4, 1([[PTR]]) + +; NEW-BE-DAG: dsrl [[ARG:\$[0-9]+]], $4, 56 +; NEW-BE-DAG: sb [[ARG]], 1([[PTR]]) + +; NEW-LE-DAG: sb $4, 1([[PTR]]) diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll new file mode 100644 index 000000000000..adacda5bc420 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -0,0 +1,1104 @@ +; RUN: llc -mtriple=mips-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s +; RUN: llc -mtriple=mipsel-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s + +; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s +; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s + +; RUN: llc -mtriple=mips64-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s +; RUN: llc -mtriple=mips64el-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s + +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s + +@hwords = global [3 x i16] zeroinitializer, align 1 +@words = global [3 x i32] zeroinitializer, align 1 +@dwords = global [3 x i64] zeroinitializer, align 1 + +define void @fn_i16_dotdotdot_i16(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i16_dotdotdot_i32(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i16_dotdotdot_i64(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i16(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i32(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i64(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i16(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i32(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i64(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll index 8fe29f3c8ce7..43da6044408b 100644 --- a/test/CodeGen/Mips/cconv/arguments.ll +++ b/test/CodeGen/Mips/cconv/arguments.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s -; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s +; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s @@ -23,8 +23,10 @@ @floats = global [11 x float] zeroinitializer @doubles = global [11 x double] zeroinitializer -define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, - i8 %h, i8 %i, i8 %j) nounwind { +define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c, + i8 signext %d, i8 signext %e, i8 signext %f, + i8 signext %g, i8 signext %h, i8 signext %i, + i8 signext %j) nounwind { entry: %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1 store volatile i8 %a, i8* %0 @@ -53,7 +55,7 @@ entry: ; We won't test the way the global address is calculated in this test. This is ; just to get the register number for the other checks. ; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes) -; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)( +; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)( ; The first four arguments are the same in O32/N32/N64 ; ALL-DAG: sb $4, 1([[R1]]) @@ -82,15 +84,16 @@ entry: ; increase by 4 for O32 and 8 for N32/N64. ; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp) ; O32-DAG: sb [[R3]], 9([[R1]]) -; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp) +; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp) ; NEW-DAG: sb [[R3]], 9([[R1]]) ; O32-DAG: lw [[R3:\$[0-9]+]], 36($sp) ; O32-DAG: sb [[R3]], 10([[R1]]) -; NEW-DAG: lw [[R3:\$[0-9]+]], 8($sp) +; NEW-DAG: ld [[R3:\$[0-9]+]], 8($sp) ; NEW-DAG: sb [[R3]], 10([[R1]]) -define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d, - i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind { +define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c, + i8 signext %d, i8 signext %e, i8 signext %f, + i8 signext %g, i64 signext %i, i8 signext %j) nounwind { entry: %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1 store volatile i8 %a, i8* %0 @@ -117,9 +120,9 @@ entry: ; We won't test the way the global address is calculated in this test. This is ; just to get the register number for the other checks. ; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes) -; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)( +; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)( ; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) -; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(dwords)( +; SYM64-DAG: ld [[R2:\$[0-9]+]], %got_disp(dwords)( ; The first argument is the same in O32/N32/N64. ; ALL-DAG: sb $4, 1([[R1]]) @@ -137,8 +140,7 @@ entry: ; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll ; accept it for now. The only IR difference is that this argument has ; anyext from i8 and align 8 on it. -; O32LE-DAG: lbu [[R3:\$[0-9]+]], 16($sp) -; O32BE-DAG: lbu [[R3:\$[0-9]+]], 19($sp) +; O32-DAG: lw [[R3:\$[0-9]+]], 16($sp) ; O32-DAG: sb [[R3]], 2([[R1]]) ; NEW-DAG: sb $6, 2([[R1]]) ; O32-DAG: lw [[R3:\$[0-9]+]], 20($sp) @@ -166,5 +168,5 @@ entry: ; increase by 4 for O32 and 8 for N32/N64. ; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp) ; O32-DAG: sb [[R3]], 7([[R1]]) -; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp) +; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp) ; NEW-DAG: sb [[R3]], 7([[R1]]) diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll index 28cf83d3efcf..d1a5e4f2fa9d 100644 --- a/test/CodeGen/Mips/cconv/return-float.ll +++ b/test/CodeGen/Mips/cconv/return-float.ll @@ -30,7 +30,7 @@ entry: ; O32-DAG: lw $2, %lo(float)([[R1]]) ; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float) ; N32-DAG: lw $2, %lo(float)([[R1]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)( ; N64-DAG: lw $2, 0([[R1]]) define double @retdouble() nounwind { @@ -44,5 +44,5 @@ entry: ; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], %lo(double) ; O32-DAG: lw $3, 4([[R2]]) ; N32-DAG: ld $2, %lo(double)([[R1:\$[0-9]+]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)( ; N64-DAG: ld $2, 0([[R1]]) diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll index 3eb26fa9d24f..123b499185a9 100644 --- a/test/CodeGen/Mips/cconv/return-hard-float.ll +++ b/test/CodeGen/Mips/cconv/return-hard-float.ll @@ -33,7 +33,7 @@ entry: ; O32-DAG: lwc1 $f0, %lo(float)([[R1]]) ; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float) ; N32-DAG: lwc1 $f0, %lo(float)([[R1]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)( ; N64-DAG: lwc1 $f0, 0([[R1]]) define double @retdouble() nounwind { @@ -45,7 +45,7 @@ entry: ; ALL-LABEL: retdouble: ; O32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]]) ; N32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)( ; N64-DAG: ldc1 $f0, 0([[R1]]) define { double, double } @retComplexDouble() #0 { diff --git a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll new file mode 100644 index 000000000000..2e8447710281 --- /dev/null +++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s +; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s +; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s + +; Test return of {fp128} agrees with de-facto N32/N64 ABI. + +@struct_fp128 = global {fp128} zeroinitializer + +define inreg {fp128} @ret_struct_fp128() nounwind { +entry: + %0 = load volatile {fp128}* @struct_fp128 + ret {fp128} %0 +} + +; ALL-LABEL: ret_struct_fp128: + +; O32 generates different IR so we don't test it here. It returns the struct +; indirectly. + +; Contrary to the N32/N64 ABI documentation, a struct containing a long double +; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to +; match the de facto ABI as implemented by GCC. +; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128) +; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]]) +; N32-DAG: dmtc1 [[R2]], $f0 +; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128) +; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]]) +; N32-DAG: dmtc1 [[R4]], $f1 + +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_fp128)($1) +; N64-DAG: ld [[R2:\$[0-9]+]], 0([[R1]]) +; N64-DAG: dmtc1 [[R2]], $f0 +; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]]) +; N64-DAG: dmtc1 [[R4]], $f1 diff --git a/test/CodeGen/Mips/cconv/return-struct.ll b/test/CodeGen/Mips/cconv/return-struct.ll new file mode 100644 index 000000000000..11a8cf032148 --- /dev/null +++ b/test/CodeGen/Mips/cconv/return-struct.ll @@ -0,0 +1,232 @@ +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s +; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s + +; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s +; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s +; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s +; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s + +; Test struct returns for all ABI's and byte orders. + +@struct_byte = global {i8} zeroinitializer +@struct_2byte = global {i8,i8} zeroinitializer +@struct_3xi16 = global {[3 x i16]} zeroinitializer +@struct_6xi32 = global {[6 x i32]} zeroinitializer +@struct_128xi16 = global {[128 x i16]} zeroinitializer + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) + +define inreg {i8} @ret_struct_i8() nounwind { +entry: + %0 = load volatile {i8}* @struct_byte + ret {i8} %0 +} + +; ALL-LABEL: ret_struct_i8: +; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte) +; O32-DAG: lbu $2, %lo(struct_byte)([[R1]]) + +; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte) +; N32-LE-DAG: lb $2, %lo(struct_byte)([[R1]]) + +; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte) +; N32-BE-DAG: lb [[R2:\$[0-9]+]], %lo(struct_byte)([[R1]]) +; N32-BE-DAG: dsll $2, [[R2]], 56 + +; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1) +; N64-LE-DAG: lb $2, 0([[R1]]) + +; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1) +; N64-BE-DAG: lb [[R2:\$[0-9]+]], 0([[R1]]) +; N64-BE-DAG: dsll $2, [[R2]], 56 + +; This test is based on the way clang currently lowers {i8,i8} to {i16}. +; FIXME: It should probably work for without any lowering too but this doesn't +; work as expected. Each member gets mapped to a register rather than +; packed into a single register. +define inreg {i16} @ret_struct_i16() nounwind { +entry: + %retval = alloca {i8,i8}, align 1 + %0 = bitcast {i8,i8}* %retval to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false) + %1 = bitcast {i8,i8}* %retval to {i16}* + %2 = load volatile {i16}* %1 + ret {i16} %2 +} + +; ALL-LABEL: ret_struct_i16: +; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte) +; O32-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]]) +; O32-DAG: sh [[R2]], 0([[SP:\$sp]]) +; O32-DAG: lhu $2, 0([[SP:\$sp]]) + +; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte) +; N32-LE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]]) +; N32-LE-DAG: sh [[R2]], 8([[SP:\$sp]]) +; N32-LE-DAG: lh $2, 8([[SP:\$sp]]) + +; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte) +; N32-BE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]]) +; N32-BE-DAG: sh [[R2]], 8([[SP:\$sp]]) +; N32-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]]) +; N32-BE-DAG: dsll $2, [[R3]], 48 + +; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1) +; N64-LE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]]) +; N64-LE-DAG: sh [[R2]], 8([[SP:\$sp]]) +; N64-LE-DAG: lh $2, 8([[SP:\$sp]]) + +; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1) +; N64-BE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]]) +; N64-BE-DAG: sh [[R2]], 8([[SP:\$sp]]) +; N64-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]]) +; N64-BE-DAG: dsll $2, [[R3]], 48 + +; Ensure that structures bigger than 32-bits but smaller than 64-bits are +; also returned in the upper bits on big endian targets. Previously, these were +; missed by the CCPromoteToType and the shift didn't happen. +define inreg {i48} @ret_struct_3xi16() nounwind { +entry: + %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2 + %1 = insertvalue {i48} undef, i48 %0, 0 + ret {i48} %1 +} + +; ALL-LABEL: ret_struct_3xi16: + +; O32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16) +; O32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16) +; O32-BE-DAG: lhu [[R1:\$[0-9]+]], 4([[PTR_LO]]) +; O32-BE-DAG: lw [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]]) +; O32-BE-DAG: sll [[R3:\$[0-9]+]], [[R2]], 16 +; O32-BE-DAG: or $3, [[R1]], [[R3]] +; O32-BE-DAG: srl $2, [[R2]], 16 + +; O32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16) +; O32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16) +; O32-LE-DAG: lhu $3, 4([[PTR_LO]]) +; O32-LE-DAG: lw $2, %lo(struct_3xi16)([[PTR_HI]]) + +; N32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16) +; N32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16) +; N32-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR_LO]]) +; N32-LE-DAG: lwu [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]]) +; N32-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32 +; N32-LE-DAG: or $2, [[R2]], [[R3]] + +; N32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16) +; N32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16) +; N32-BE-DAG: lw [[R1:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]]) +; N32-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16 +; N32-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR_LO]]) +; N32-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]] +; N32-BE-DAG: dsll $2, [[R4]], 16 + +; N64-LE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1) +; N64-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR]]) +; N64-LE-DAG: lwu [[R2:\$[0-9]+]], 0([[PTR]]) +; N64-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32 +; N64-LE-DAG: or $2, [[R2]], [[R3]] + +; N64-BE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1) +; N64-BE-DAG: lw [[R1:\$[0-9]+]], 0([[PTR]]) +; N64-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16 +; N64-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR]]) +; N64-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]] +; N32-BE-DAG: dsll $2, [[R4]], 16 + +; Ensure that large structures (>128-bit) are returned indirectly. +; We pick an extremely large structure so we don't have to match inlined memcpy's. +define void @ret_struct_128xi16({[128 x i16]}* sret %returnval) { +entry: + %0 = bitcast {[128 x i16]}* %returnval to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i32 2, i1 false) + ret void +} + +; ALL-LABEL: ret_struct_128xi16: + +; sret pointer is already in $4 +; O32-DAG: lui [[PTR:\$[0-9]+]], %hi(struct_128xi16) +; O32-DAG: addiu $5, [[PTR]], %lo(struct_128xi16) +; O32: jal memcpy + +; sret pointer is already in $4 +; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_128xi16) +; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_128xi16) +; FIXME: This signext isn't necessary. Like integers, pointers are +; but unlike integers, pointers cannot have the signext attribute. +; N32-DAG: sll $5, [[PTR]], 0 +; N32: jal memcpy + +; sret pointer is already in $4 +; N64-DAG: ld $5, %got_disp(struct_128xi16)( +; N64-DAG: ld $25, %call16(memcpy)( +; N64: jalr $25 + +; Ensure that large structures (>128-bit) are returned indirectly. +; This will generate inlined memcpy's anyway so pick the smallest large +; structure +; This time we let the backend lower the sret argument. +define {[6 x i32]} @ret_struct_6xi32() { +entry: + %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2 + ret {[6 x i32]} %0 +} + +; ALL-LABEL: ret_struct_6xi32: + +; sret pointer is already in $4 +; O32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32) +; O32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32) +; O32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]]) +; O32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]]) +; O32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]]) +; O32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]]) +; O32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]]) +; O32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]]) +; O32-DAG: sw [[T0]], 0($4) +; O32-DAG: sw [[T1]], 4($4) +; O32-DAG: sw [[T2]], 8($4) +; O32-DAG: sw [[T3]], 12($4) +; O32-DAG: sw [[T4]], 16($4) +; O32-DAG: sw [[T5]], 20($4) + +; FIXME: This signext isn't necessary. Like integers, pointers are +; but unlike integers, pointers cannot have the signext attribute. +; In this case we don't have anywhere to put the signext either since +; the sret argument is invented by the backend. +; N32-DAG: sll [[RET_PTR:\$[0-9]+]], $4, 0 +; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32) +; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32) +; N32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]]) +; N32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]]) +; N32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]]) +; N32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]]) +; N32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]]) +; N32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]]) +; N32-DAG: sw [[T0]], 0([[RET_PTR]]) +; N32-DAG: sw [[T1]], 4([[RET_PTR]]) +; N32-DAG: sw [[T2]], 8([[RET_PTR]]) +; N32-DAG: sw [[T3]], 12([[RET_PTR]]) +; N32-DAG: sw [[T4]], 16([[RET_PTR]]) +; N32-DAG: sw [[T5]], 20([[RET_PTR]]) + +; sret pointer is already in $4 +; N64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_6xi32)( +; N64-DAG: lw [[T0:\$[0-9]+]], 0([[PTR]]) +; N64-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]]) +; N64-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]]) +; N64-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]]) +; N64-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]]) +; N64-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]]) +; N64-DAG: sw [[T0]], 0($4) +; N64-DAG: sw [[T1]], 4($4) +; N64-DAG: sw [[T2]], 8($4) +; N64-DAG: sw [[T3]], 12($4) +; N64-DAG: sw [[T4]], 16($4) +; N64-DAG: sw [[T5]], 20($4) diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll index 76ce5e44c4ae..63f9b5f45a18 100644 --- a/test/CodeGen/Mips/cconv/return.ll +++ b/test/CodeGen/Mips/cconv/return.ll @@ -33,7 +33,7 @@ entry: ; O32-DAG: lbu $2, %lo(byte)([[R1]]) ; N32-DAG: lui [[R1:\$[0-9]+]], %hi(byte) ; N32-DAG: lbu $2, %lo(byte)([[R1]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)( ; N64-DAG: lbu $2, 0([[R1]]) define i32 @reti32() nounwind { @@ -47,7 +47,7 @@ entry: ; O32-DAG: lw $2, %lo(word)([[R1]]) ; N32-DAG: lui [[R1:\$[0-9]+]], %hi(word) ; N32-DAG: lw $2, %lo(word)([[R1]]) -; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)($1) +; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)( ; N64-DAG: lw $2, 0([[R1]]) define i64 @reti64() nounwind { diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll index 0c13fb1adfbe..e548049ab346 100755 --- a/test/CodeGen/Mips/cmov.ll +++ b/test/CodeGen/Mips/cmov.ll @@ -38,7 +38,7 @@ ; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: ld $2, 0($[[T2]]) -define i32* @cmov1(i32 %s) nounwind readonly { +define i32* @cmov1(i32 signext %s) nounwind readonly { entry: %tobool = icmp ne i32 %s, 0 %tmp1 = load i32** @i3, align 4 @@ -78,7 +78,7 @@ entry: ; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: lw $2, 0($[[T2]]) -define i32 @cmov2(i32 %s) nounwind readonly { +define i32 @cmov2(i32 signext %s) nounwind readonly { entry: %tobool = icmp ne i32 %s, 0 %tmp1 = load i32* @c, align 4 @@ -109,7 +109,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone { +define i32 @cmov3(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone { entry: %cmp = icmp eq i32 %a, 234 %cond = select i1 %cmp, i32 %b, i32 %c @@ -142,7 +142,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone { +define i32 @cmov3_ne(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone { entry: %cmp = icmp ne i32 %a, 234 %cond = select i1 %cmp, i32 %b, i32 %c @@ -179,7 +179,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone { +define i64 @cmov4(i32 signext %a, i64 %b, i64 %c) nounwind readnone { entry: %cmp = icmp eq i32 %a, 234 %cond = select i1 %cmp, i64 %b, i64 %c @@ -220,7 +220,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone { +define i64 @cmov4_ne(i32 signext %a, i64 %b, i64 %c) nounwind readnone { entry: %cmp = icmp ne i32 %a, 234 %cond = select i1 %cmp, i64 %b, i64 %c @@ -263,7 +263,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @slti0(i32 %a) { +define i32 @slti0(i32 signext %a) { entry: %cmp = icmp sgt i32 %a, 32766 %cond = select i1 %cmp, i32 3, i32 5 @@ -302,7 +302,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @slti1(i32 %a) { +define i32 @slti1(i32 signext %a) { entry: %cmp = icmp sgt i32 %a, 32767 %cond = select i1 %cmp, i32 7, i32 5 @@ -337,7 +337,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @slti2(i32 %a) { +define i32 @slti2(i32 signext %a) { entry: %cmp = icmp sgt i32 %a, -32769 %cond = select i1 %cmp, i32 3, i32 5 @@ -380,7 +380,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @slti3(i32 %a) { +define i32 @slti3(i32 signext %a) { entry: %cmp = icmp sgt i32 %a, -32770 %cond = select i1 %cmp, i32 3, i32 5 @@ -567,7 +567,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @sltiu0(i32 %a) { +define i32 @sltiu0(i32 signext %a) { entry: %cmp = icmp ugt i32 %a, 32766 %cond = select i1 %cmp, i32 3, i32 5 @@ -606,7 +606,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @sltiu1(i32 %a) { +define i32 @sltiu1(i32 signext %a) { entry: %cmp = icmp ugt i32 %a, 32767 %cond = select i1 %cmp, i32 7, i32 5 @@ -641,7 +641,7 @@ entry: ; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @sltiu2(i32 %a) { +define i32 @sltiu2(i32 signext %a) { entry: %cmp = icmp ugt i32 %a, -32769 %cond = select i1 %cmp, i32 3, i32 5 @@ -684,7 +684,7 @@ entry: ; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] -define i32 @sltiu3(i32 %a) { +define i32 @sltiu3(i32 signext %a) { entry: %cmp = icmp ugt i32 %a, -32770 %cond = select i1 %cmp, i32 3, i32 5 @@ -697,7 +697,7 @@ entry: ; doesn't generate conditional moves ; for constant operands whose difference is |1| -define i32 @slti4(i32 %a) nounwind readnone { +define i32 @slti4(i32 signext %a) nounwind readnone { %1 = icmp slt i32 %a, 7 %2 = select i1 %1, i32 4, i32 3 ret i32 %2 @@ -723,7 +723,7 @@ define i32 @slti4(i32 %a) nounwind readnone { ; 64-CMP-NOT: seleqz ; 64-CMP-NOT: selnez -define i32 @slti5(i32 %a) nounwind readnone { +define i32 @slti5(i32 signext %a) nounwind readnone { %1 = icmp slt i32 %a, 7 %2 = select i1 %1, i32 -3, i32 -4 ret i32 %2 @@ -749,7 +749,7 @@ define i32 @slti5(i32 %a) nounwind readnone { ; 64-CMP-NOT: seleqz ; 64-CMP-NOT: selnez -define i32 @slti6(i32 %a) nounwind readnone { +define i32 @slti6(i32 signext %a) nounwind readnone { %1 = icmp slt i32 %a, 7 %2 = select i1 %1, i32 3, i32 4 ret i32 %2 diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll index 186202141dcb..60b2a88196bd 100644 --- a/test/CodeGen/Mips/const-mult.ll +++ b/test/CodeGen/Mips/const-mult.ll @@ -5,7 +5,7 @@ ; CHECK: sll $[[R0:[0-9]+]], $4, 2 ; CHECK: addu ${{[0-9]+}}, $[[R0]], $4 -define i32 @mul5_32(i32 %a) { +define i32 @mul5_32(i32 signext %a) { entry: %mul = mul nsw i32 %a, 5 ret i32 %mul @@ -17,7 +17,7 @@ entry: ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5 ; CHECK: subu ${{[0-9]+}}, $[[R2]], $[[R1]] -define i32 @mul27_32(i32 %a) { +define i32 @mul27_32(i32 signext %a) { entry: %mul = mul nsw i32 %a, 27 ret i32 %mul @@ -29,7 +29,7 @@ entry: ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31 ; CHECK: addu ${{[0-9]+}}, $[[R2]], $[[R1]] -define i32 @muln2147483643_32(i32 %a) { +define i32 @muln2147483643_32(i32 signext %a) { entry: %mul = mul nsw i32 %a, -2147483643 ret i32 %mul @@ -41,7 +41,7 @@ entry: ; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63 ; CHECK64: daddu ${{[0-9]+}}, $[[R2]], $[[R1]] -define i64 @muln9223372036854775805_64(i64 %a) { +define i64 @muln9223372036854775805_64(i64 signext %a) { entry: %mul = mul nsw i64 %a, -9223372036854775805 ret i64 %mul diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll index 6e63cff123cf..b7aad049e8ab 100644 --- a/test/CodeGen/Mips/countleading.ll +++ b/test/CodeGen/Mips/countleading.ll @@ -11,7 +11,7 @@ ; MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's) ; MIPS64-GT-R1 - MIPS64r1 and above -define i32 @ctlz_i32(i32 %X) nounwind readnone { +define i32 @ctlz_i32(i32 signext %X) nounwind readnone { entry: ; ALL-LABEL: ctlz_i32: @@ -27,7 +27,7 @@ entry: declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone -define i32 @ctlo_i32(i32 %X) nounwind readnone { +define i32 @ctlo_i32(i32 signext %X) nounwind readnone { entry: ; ALL-LABEL: ctlo_i32: diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll index 97f836044406..a9cfe0fa1523 100644 --- a/test/CodeGen/Mips/divrem.ll +++ b/test/CodeGen/Mips/divrem.ll @@ -27,7 +27,7 @@ @g0 = common global i32 0, align 4 @g1 = common global i32 0, align 4 -define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone { +define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { entry: ; ALL-LABEL: sdiv1: @@ -54,7 +54,7 @@ entry: ret i32 %div } -define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone { +define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone { entry: ; ALL-LABEL: srem1: @@ -81,7 +81,7 @@ entry: ret i32 %rem } -define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone { +define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone { entry: ; ALL-LABEL: udiv1: @@ -107,7 +107,7 @@ entry: ret i32 %div } -define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone { +define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone { entry: ; ALL-LABEL: urem1: @@ -134,7 +134,7 @@ entry: ret i32 %rem } -define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +define i32 @sdivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind { entry: ; ALL-LABEL: sdivrem1: @@ -175,7 +175,7 @@ entry: ret i32 %div } -define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind { entry: ; ALL-LABEL: udivrem1: diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll index e78497a9521e..b4efb40b6422 100644 --- a/test/CodeGen/Mips/ehframe-indirect.ll +++ b/test/CodeGen/Mips/ehframe-indirect.ll @@ -1,5 +1,7 @@ -; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s -; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s +; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=CHECK32 %s +; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=CHECK32 %s +; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=CHECK64 %s +; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=CHECK64 %s define i32 @main() { ; CHECK: .cfi_startproc @@ -27,8 +29,11 @@ declare void @foo() ; CHECK: .hidden DW.ref.__gxx_personality_v0 ; CHECK: .weak DW.ref.__gxx_personality_v0 ; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat -; CHECK: .align 2 +; CHECK32: .align 2 +; CHECK64: .align 3 ; CHECK: .type DW.ref.__gxx_personality_v0,@object -; CHECK: .size DW.ref.__gxx_personality_v0, 4 +; CHECK32: .size DW.ref.__gxx_personality_v0, 4 +; CHECK64: .size DW.ref.__gxx_personality_v0, 8 ; CHECK: DW.ref.__gxx_personality_v0: -; CHECK: .4byte __gxx_personality_v0 +; CHECK32: .4byte __gxx_personality_v0 +; CHECK64: .8byte __gxx_personality_v0 diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll index 822902c27d2f..6b022c5e36d9 100644 --- a/test/CodeGen/Mips/fastcc.ll +++ b/test/CodeGen/Mips/fastcc.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=mipsel-none-nacl-gnu \ ; RUN: | FileCheck %s -check-prefix=CHECK-NACL ; RUN: llc < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+fp64,+nooddspreg | FileCheck %s -check-prefix=FP64-NOODDSPREG @gi0 = external global i32 @@ -82,6 +83,7 @@ @g16 = external global i32 @fa = common global [11 x float] zeroinitializer, align 4 +@da = common global [11 x double] zeroinitializer, align 8 define void @caller0() nounwind { entry: @@ -270,7 +272,7 @@ entry: define void @caller2() { entry: -; NOODDSPREG-LABEL: caller2 +; NOODDSPREG-LABEL: caller2: ; Check that first 10 arguments are passed in even float registers ; f0, f2, ... , f18. Check that 11th argument is passed on stack. @@ -312,7 +314,7 @@ define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3, float %a8, float %a9, float %a10) { entry: -; NOODDSPREG-LABEL: callee2 +; NOODDSPREG-LABEL: callee2: ; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] @@ -348,3 +350,83 @@ entry: ret void } +define void @caller3() { +entry: + +; FP64-NOODDSPREG-LABEL: caller3: + +; Check that first 10 arguments are passed in even float registers +; f0, f2, ... , f18. Check that 11th argument is passed on stack. + +; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}}) +; FP64-NOODDSPREG-DAG: ldc1 $f0, 0($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f2, 8($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f4, 16($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f6, 24($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f8, 32($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f10, 40($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f12, 48($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f14, 56($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f16, 64($[[R0]]) +; FP64-NOODDSPREG-DAG: ldc1 $f18, 72($[[R0]]) + +; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 80($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 0($sp) + + %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8 + %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8 + %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8 + %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8 + %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8 + %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8 + %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8 + %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8 + %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8 + %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8 + %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8 + tail call fastcc void @callee3(double %0, double %1, double %2, double %3, + double %4, double %5, double %6, double %7, + double %8, double %9, double %10) + ret void +} + +define fastcc void @callee3(double %a0, double %a1, double %a2, double %a3, + double %a4, double %a5, double %a6, double %a7, + double %a8, double %a9, double %a10) { +entry: + +; FP64-NOODDSPREG-LABEL: callee3: + +; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] + +; Check that first 10 arguments are received in even float registers +; f0, f2, ... , f18. Check that 11th argument is received on stack. + +; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}}) +; FP64-NOODDSPREG-DAG: sdc1 $f0, 0($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f2, 8($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f4, 16($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f6, 24($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f8, 32($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f10, 40($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f12, 48($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f14, 56($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]]) +; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]]) + +; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp) +; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]]) + + store double %a0, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8 + store double %a1, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8 + store double %a2, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8 + store double %a3, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8 + store double %a4, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8 + store double %a5, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8 + store double %a6, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8 + store double %a7, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8 + store double %a8, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8 + store double %a9, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8 + store double %a10, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8 + ret void +} diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll index 5c2c87373a32..fadce5cb748b 100644 --- a/test/CodeGen/Mips/fp64a.ll +++ b/test/CodeGen/Mips/fp64a.ll @@ -12,9 +12,9 @@ ; this check here. ; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE -; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE +; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE -; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE +; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A ; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A ; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A @@ -38,15 +38,10 @@ define double @call1(double %d, ...) { ; 32R2-NO-FP64A-BE: mtc1 $5, $f0 ; 32R2-NO-FP64A-BE: mthc1 $4, $f0 -; 32R2-FP64A-LE: addiu $sp, $sp, -8 -; 32R2-FP64A-LE: sw $4, 0($sp) -; 32R2-FP64A-LE: sw $5, 4($sp) -; 32R2-FP64A-LE: ldc1 $f0, 0($sp) - -; 32R2-FP64A-BE: addiu $sp, $sp, -8 -; 32R2-FP64A-BE: sw $5, 0($sp) -; 32R2-FP64A-BE: sw $4, 4($sp) -; 32R2-FP64A-BE: ldc1 $f0, 0($sp) +; 32R2-FP64A: addiu $sp, $sp, -8 +; 32R2-FP64A: sw $4, 0($sp) +; 32R2-FP64A: sw $5, 4($sp) +; 32R2-FP64A: ldc1 $f0, 0($sp) ; 64-NO-FP64A: daddiu $sp, $sp, -64 ; 64-NO-FP64A: mov.d $f0, $f12 @@ -63,15 +58,10 @@ define double @call2(i32 %i, double %d) { ; 32R2-NO-FP64A-BE: mtc1 $7, $f0 ; 32R2-NO-FP64A-BE: mthc1 $6, $f0 -; 32R2-FP64A-LE: addiu $sp, $sp, -8 -; 32R2-FP64A-LE: sw $6, 0($sp) -; 32R2-FP64A-LE: sw $7, 4($sp) -; 32R2-FP64A-LE: ldc1 $f0, 0($sp) - -; 32R2-FP64A-BE: addiu $sp, $sp, -8 -; 32R2-FP64A-BE: sw $7, 0($sp) -; 32R2-FP64A-BE: sw $6, 4($sp) -; 32R2-FP64A-BE: ldc1 $f0, 0($sp) +; 32R2-FP64A: addiu $sp, $sp, -8 +; 32R2-FP64A: sw $6, 0($sp) +; 32R2-FP64A: sw $7, 4($sp) +; 32R2-FP64A: ldc1 $f0, 0($sp) ; 64-NO-FP64A-NOT: daddiu $sp, $sp ; 64-NO-FP64A: mov.d $f0, $f13 @@ -88,15 +78,10 @@ define double @call3(float %f1, float %f2, double %d) { ; 32R2-NO-FP64A-BE: mtc1 $7, $f0 ; 32R2-NO-FP64A-BE: mthc1 $6, $f0 -; 32R2-FP64A-LE: addiu $sp, $sp, -8 -; 32R2-FP64A-LE: sw $6, 0($sp) -; 32R2-FP64A-LE: sw $7, 4($sp) -; 32R2-FP64A-LE: ldc1 $f0, 0($sp) - -; 32R2-FP64A-BE: addiu $sp, $sp, -8 -; 32R2-FP64A-BE: sw $7, 0($sp) -; 32R2-FP64A-BE: sw $6, 4($sp) -; 32R2-FP64A-BE: ldc1 $f0, 0($sp) +; 32R2-FP64A: addiu $sp, $sp, -8 +; 32R2-FP64A: sw $6, 0($sp) +; 32R2-FP64A: sw $7, 4($sp) +; 32R2-FP64A: ldc1 $f0, 0($sp) ; 64-NO-FP64A-NOT: daddiu $sp, $sp ; 64-NO-FP64A: mov.d $f0, $f14 @@ -113,15 +98,10 @@ define double @call4(float %f, double %d, ...) { ; 32R2-NO-FP64A-BE: mtc1 $7, $f0 ; 32R2-NO-FP64A-BE: mthc1 $6, $f0 -; 32R2-FP64A-LE: addiu $sp, $sp, -8 -; 32R2-FP64A-LE: sw $6, 0($sp) -; 32R2-FP64A-LE: sw $7, 4($sp) -; 32R2-FP64A-LE: ldc1 $f0, 0($sp) - -; 32R2-FP64A-BE: addiu $sp, $sp, -8 -; 32R2-FP64A-BE: sw $7, 0($sp) -; 32R2-FP64A-BE: sw $6, 4($sp) -; 32R2-FP64A-BE: ldc1 $f0, 0($sp) +; 32R2-FP64A: addiu $sp, $sp, -8 +; 32R2-FP64A: sw $6, 0($sp) +; 32R2-FP64A: sw $7, 4($sp) +; 32R2-FP64A: ldc1 $f0, 0($sp) ; 64-NO-FP64A: daddiu $sp, $sp, -48 ; 64-NO-FP64A: mov.d $f0, $f13 @@ -145,23 +125,14 @@ define double @call5(double %a, double %b, ...) { ; 32R2-NO-FP64A-BE-DAG: mthc1 $6, $[[T1:f[0-9]+]] ; 32R2-NO-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]] -; 32R2-FP64A-LE: addiu $sp, $sp, -8 -; 32R2-FP64A-LE: sw $6, 0($sp) -; 32R2-FP64A-LE: sw $7, 4($sp) -; 32R2-FP64A-LE: ldc1 $[[T1:f[0-9]+]], 0($sp) -; 32R2-FP64A-LE: sw $4, 0($sp) -; 32R2-FP64A-LE: sw $5, 4($sp) -; 32R2-FP64A-LE: ldc1 $[[T0:f[0-9]+]], 0($sp) -; 32R2-FP64A-LE: sub.d $f0, $[[T0]], $[[T1]] - -; 32R2-FP64A-BE: addiu $sp, $sp, -8 -; 32R2-FP64A-BE: sw $7, 0($sp) -; 32R2-FP64A-BE: sw $6, 4($sp) -; 32R2-FP64A-BE: ldc1 $[[T1:f[0-9]+]], 0($sp) -; 32R2-FP64A-BE: sw $5, 0($sp) -; 32R2-FP64A-BE: sw $4, 4($sp) -; 32R2-FP64A-BE: ldc1 $[[T0:f[0-9]+]], 0($sp) -; 32R2-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]] +; 32R2-FP64A: addiu $sp, $sp, -8 +; 32R2-FP64A: sw $6, 0($sp) +; 32R2-FP64A: sw $7, 4($sp) +; 32R2-FP64A: ldc1 $[[T1:f[0-9]+]], 0($sp) +; 32R2-FP64A: sw $4, 0($sp) +; 32R2-FP64A: sw $5, 4($sp) +; 32R2-FP64A: ldc1 $[[T0:f[0-9]+]], 0($sp) +; 32R2-FP64A: sub.d $f0, $[[T0]], $[[T1]] ; 64-NO-FP64A: sub.d $f0, $f12, $f13 } @@ -179,19 +150,12 @@ define double @move_from(double %d) { ; 32R2-NO-FP64A-BE-DAG: mfc1 $7, $f0 ; 32R2-NO-FP64A-BE-DAG: mfhc1 $6, $f0 -; 32R2-FP64A-LE: addiu $sp, $sp, -32 -; 32R2-FP64A-LE: sdc1 $f0, 16($sp) -; 32R2-FP64A-LE: lw $6, 16($sp) -; FIXME: This store is redundant -; 32R2-FP64A-LE: sdc1 $f0, 16($sp) -; 32R2-FP64A-LE: lw $7, 20($sp) - -; 32R2-FP64A-BE: addiu $sp, $sp, -32 -; 32R2-FP64A-BE: sdc1 $f0, 16($sp) -; 32R2-FP64A-BE: lw $6, 20($sp) +; 32R2-FP64A: addiu $sp, $sp, -32 +; 32R2-FP64A: sdc1 $f0, 16($sp) +; 32R2-FP64A: lw $6, 16($sp) ; FIXME: This store is redundant -; 32R2-FP64A-BE: sdc1 $f0, 16($sp) -; 32R2-FP64A-BE: lw $7, 16($sp) +; 32R2-FP64A: sdc1 $f0, 16($sp) +; 32R2-FP64A: lw $7, 20($sp) ; 64-NO-FP64A: mov.d $f13, $f0 } diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll index 6512851a11be..3d9dec76fb37 100644 --- a/test/CodeGen/Mips/inlineasm-operand-code.ll +++ b/test/CodeGen/Mips/inlineasm-operand-code.ll @@ -65,6 +65,33 @@ entry: ;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},$0 ;CHECK_LITTLE_32: #NO_APP tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind + +; z with non-zero and the "r"(register) and "J"(integer zero) constraints +;CHECK_LITTLE_32: #APP +;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}} +;CHECK_LITTLE_32: #NO_APP + call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind + +; z with zero and the "r"(register) and "J"(integer zero) constraints +;CHECK_LITTLE_32: #APP +;CHECK_LITTLE_32: mtc0 $0, ${{[0-9]+}} +;CHECK_LITTLE_32: #NO_APP + call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind + +; z with non-zero and just the "r"(register) constraint +;CHECK_LITTLE_32: #APP +;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}} +;CHECK_LITTLE_32: #NO_APP + call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind + +; z with zero and just the "r"(register) constraint +; FIXME: Check for $0, instead of other registers. +; We should be using $0 directly in this case, not real registers. +; When the materialization of 0 gets fixed, this test will fail. +;CHECK_LITTLE_32: #APP +;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}} +;CHECK_LITTLE_32: #NO_APP + call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll index a3f5ebfb5460..f6d0e8debb36 100644 --- a/test/CodeGen/Mips/load-store-left-right.ll +++ b/test/CodeGen/Mips/load-store-left-right.ll @@ -47,7 +47,7 @@ entry: ret i32 %0 } -define void @store_SI(i32 %a) nounwind { +define void @store_SI(i32 signext %a) nounwind { entry: ; ALL-LABEL: store_SI: @@ -201,7 +201,7 @@ entry: ret void } -define void @store_SI_trunc_from_i64(i32 %a) nounwind { +define void @store_SI_trunc_from_i64(i32 signext %a) nounwind { entry: ; ALL-LABEL: store_SI_trunc_from_i64: diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index a403744c8fd5..b9b52be01dad 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -13,7 +13,7 @@ @x = external global i32 -define void @test1(i32 %s) { +define void @test1(i32 signext %s) { entry: %cmp = icmp eq i32 %s, 0 br i1 %cmp, label %end, label %then diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll index 82229677ff11..b0c3ff6ff9b5 100644 --- a/test/CodeGen/Mips/madd-msub.ll +++ b/test/CodeGen/Mips/madd-msub.ll @@ -76,26 +76,14 @@ entry: ; 32R6-DAG: muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}} ; 32R6-DAG: addu $2, $[[T3]], $[[T2]] -; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32 -; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32 -; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32 -; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32 -; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]] -; 64-DAG: [[m]]flo $[[T4:[0-9]+]] -; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32 -; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32 -; 64-DAG: daddu $2, $[[T4]], $[[T6]] - -; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32 -; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32 -; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32 -; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32 -; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]] -; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32 -; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32 -; 64R6-DAG: daddu $2, $[[T4]], $[[T6]] - -define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone { +; 64-DAG: d[[m:m]]ult $5, $4 +; 64-DAG: [[m]]flo $[[T0:[0-9]+]] +; 64-DAG: daddu $2, $[[T0]], $6 + +; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4 +; 64R6-DAG: daddu $2, $[[T0]], $6 + +define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone { entry: %conv = zext i32 %a to i64 %conv2 = zext i32 %b to i64 @@ -214,26 +202,14 @@ entry: ; 32R6-DAG: negu $2, $[[T3]] ; 32R6-DAG: subu $3, $6, $[[T1]] -; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32 -; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32 -; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32 -; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32 -; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]] -; 64-DAG: [[m]]flo $[[T4:[0-9]+]] -; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32 -; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32 -; 64-DAG: dsubu $2, $[[T6]], $[[T4]] - -; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32 -; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32 -; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32 -; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32 -; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]] -; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32 -; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32 -; 64R6-DAG: dsubu $2, $[[T6]], $[[T4]] - -define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone { +; 64-DAG: d[[m:m]]ult $5, $4 +; 64-DAG: [[m]]flo $[[T0:[0-9]+]] +; 64-DAG: dsubu $2, $6, $[[T0]] + +; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4 +; 64R6-DAG: dsubu $2, $6, $[[T0]] + +define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone { entry: %conv = zext i32 %c to i64 %conv2 = zext i32 %a to i64 diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll index 7f7d515d690e..f0cbbd08d79a 100644 --- a/test/CodeGen/Mips/mips64-f128.ll +++ b/test/CodeGen/Mips/mips64-f128.ll @@ -114,7 +114,7 @@ entry: ; ALL-LABEL: conv_LD_UInt: ; ALL: ld $25, %call16(__floatunsitf) -define fp128 @conv_LD_UInt(i32 %a) { +define fp128 @conv_LD_UInt(i32 signext %a) { entry: %conv = uitofp i32 %a to fp128 ret fp128 %conv @@ -635,7 +635,7 @@ entry: ; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]] ; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]] -define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) { +define fp128 @select_LD(i32 signext %a, i64, fp128 %b, fp128 %c) { entry: %tobool = icmp ne i32 %a, 0 %cond = select i1 %tobool, fp128 %b, fp128 %c diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll index 7a52c3d41d69..ed494e965b7d 100644 --- a/test/CodeGen/Mips/mips64-sret.ll +++ b/test/CodeGen/Mips/mips64-sret.ll @@ -11,7 +11,7 @@ entry: ret void } -define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind { +define void @bar(i32 signext %v, i32* noalias sret %agg.result) nounwind { entry: ; CHECK-LABEL: bar: ; CHECK: sw $4, 0($5) diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll index 07e67bf04287..ebec465a3e33 100644 --- a/test/CodeGen/Mips/msa/frameindex.ll +++ b/test/CodeGen/Mips/msa/frameindex.ll @@ -36,10 +36,10 @@ define void @loadstore_v16i8_just_over_simm10() nounwind { %2 = alloca [497 x i8] ; Push the frame just over 512 bytes %3 = load volatile <16 x i8>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512 ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <16 x i8> %3, <16 x i8>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512 ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) ret void @@ -53,12 +53,12 @@ define void @loadstore_v16i8_just_under_simm16() nounwind { %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %3 = load volatile <16 x i8>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <16 x i8> %3, <16 x i8>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) ret void @@ -72,12 +72,12 @@ define void @loadstore_v16i8_just_over_simm16() nounwind { %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %3 = load volatile <16 x i8>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <16 x i8> %3, <16 x i8>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) ret void @@ -107,10 +107,10 @@ define void @loadstore_v8i16_unaligned() nounwind { %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0 %6 = load volatile <8 x i16>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <8 x i16> %6, <8 x i16>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) ret void @@ -139,10 +139,10 @@ define void @loadstore_v8i16_just_over_simm10() nounwind { %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes %3 = load volatile <8 x i16>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024 ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <8 x i16> %3, <8 x i16>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024 ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) ret void @@ -156,12 +156,12 @@ define void @loadstore_v8i16_just_under_simm16() nounwind { %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %3 = load volatile <8 x i16>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <8 x i16> %3, <8 x i16>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) ret void @@ -175,12 +175,12 @@ define void @loadstore_v8i16_just_over_simm16() nounwind { %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %3 = load volatile <8 x i16>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <8 x i16> %3, <8 x i16>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) ret void @@ -210,10 +210,10 @@ define void @loadstore_v4i32_unaligned() nounwind { %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0 %6 = load volatile <4 x i32>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <4 x i32> %6, <4 x i32>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) ret void @@ -242,10 +242,10 @@ define void @loadstore_v4i32_just_over_simm10() nounwind { %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes %3 = load volatile <4 x i32>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048 ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <4 x i32> %3, <4 x i32>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048 ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) ret void @@ -259,12 +259,12 @@ define void @loadstore_v4i32_just_under_simm16() nounwind { %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %3 = load volatile <4 x i32>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <4 x i32> %3, <4 x i32>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) ret void @@ -278,12 +278,12 @@ define void @loadstore_v4i32_just_over_simm16() nounwind { %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %3 = load volatile <4 x i32>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <4 x i32> %3, <4 x i32>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) ret void @@ -313,10 +313,10 @@ define void @loadstore_v2i64_unaligned() nounwind { %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0 %6 = load volatile <2 x i64>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <2 x i64> %6, <2 x i64>* %5 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1 ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) ret void @@ -345,10 +345,10 @@ define void @loadstore_v2i64_just_over_simm10() nounwind { %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes %3 = load volatile <2 x i64>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096 ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <2 x i64> %3, <2 x i64>* %1 - ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096 + ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096 ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) ret void @@ -362,12 +362,12 @@ define void @loadstore_v2i64_just_under_simm16() nounwind { %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %3 = load volatile <2 x i64>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <2 x i64> %3, <2 x i64>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) ret void @@ -381,12 +381,12 @@ define void @loadstore_v2i64_just_over_simm16() nounwind { %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %3 = load volatile <2 x i64>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) store volatile <2 x i64> %3, <2 x i64>* %1 - ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 - ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]] ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) ret void diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll index 52c37f69d020..3432b3992984 100644 --- a/test/CodeGen/Mips/octeon_popcnt.ll +++ b/test/CodeGen/Mips/octeon_popcnt.ll @@ -6,7 +6,7 @@ define i8 @cnt8(i8 %x) nounwind readnone { ret i8 %cnt ; OCTEON-LABEL: cnt8: ; OCTEON: jr $ra -; OCTEON: pop $2, $1 +; OCTEON: pop $2, [[R1:\$[0-9]+]] ; MIPS64-LABEL: cnt8: ; MIPS64-NOT: pop } @@ -16,12 +16,12 @@ define i16 @cnt16(i16 %x) nounwind readnone { ret i16 %cnt ; OCTEON-LABEL: cnt16: ; OCTEON: jr $ra -; OCTEON: pop $2, $1 +; OCTEON: pop $2, [[R1:\$[0-9]+]] ; MIPS64-LABEL: cnt16: ; MIPS64-NOT: pop } -define i32 @cnt32(i32 %x) nounwind readnone { +define i32 @cnt32(i32 zeroext %x) nounwind readnone { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt ; OCTEON-LABEL: cnt32: diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll index eb2198b36dff..d6e1826c30c8 100644 --- a/test/CodeGen/Mips/select.ll +++ b/test/CodeGen/Mips/select.ll @@ -8,7 +8,7 @@ @d2 = external global double @d3 = external global double -define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone { +define i32 @i32_icmp_ne_i32_val(i32 signext %s, i32 signext %f0, i32 signext %f1) nounwind readnone { entry: ; ALL-LABEL: i32_icmp_ne_i32_val: @@ -37,7 +37,7 @@ entry: ret i32 %cond } -define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone { +define i64 @i32_icmp_ne_i64_val(i32 signext %s, i64 %f0, i64 %f1) nounwind readnone { entry: ; ALL-LABEL: i32_icmp_ne_i64_val: @@ -128,7 +128,7 @@ entry: ret i64 %cond } -define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone { +define float @i32_icmp_ne_f32_val(i32 signext %s, float %f0, float %f1) nounwind readnone { entry: ; ALL-LABEL: i32_icmp_ne_f32_val: @@ -161,7 +161,7 @@ entry: ret float %cond } -define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone { +define double @i32_icmp_ne_f64_val(i32 signext %s, double %f0, double %f1) nounwind readnone { entry: ; ALL-LABEL: i32_icmp_ne_f64_val: @@ -496,7 +496,7 @@ entry: ret float %cond } -define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +define i32 @f32_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone { entry: ; ALL-LABEL: f32_fcmp_oeq_i32_val: @@ -541,7 +541,7 @@ entry: ret i32 %cond } -define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +define i32 @f32_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone { entry: ; ALL-LABEL: f32_fcmp_olt_i32_val: @@ -585,7 +585,7 @@ entry: ret i32 %cond } -define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +define i32 @f32_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone { entry: ; ALL-LABEL: f32_fcmp_ogt_i32_val: @@ -630,7 +630,7 @@ entry: ret i32 %cond } -define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly { +define i32 @f64_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly { entry: ; ALL-LABEL: f64_fcmp_oeq_i32_val: @@ -707,7 +707,7 @@ entry: ret i32 %cond } -define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly { +define i32 @f64_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly { entry: ; ALL-LABEL: f64_fcmp_olt_i32_val: @@ -784,7 +784,7 @@ entry: ret i32 %cond } -define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly { +define i32 @f64_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly { entry: ; ALL-LABEL: f64_fcmp_ogt_i32_val: diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll index a1b6cb0322b1..c766d3b3cc2a 100644 --- a/test/CodeGen/Mips/zeroreg.ll +++ b/test/CodeGen/Mips/zeroreg.ll @@ -8,7 +8,7 @@ @g1 = external global i32 -define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly { +define i32 @sel_icmp_nez_i32_z0(i32 signext %s) nounwind readonly { entry: ; ALL-LABEL: sel_icmp_nez_i32_z0: @@ -30,7 +30,7 @@ entry: ret i32 %cond } -define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly { +define i32 @sel_icmp_nez_i32_z1(i32 signext %s) nounwind readonly { entry: ; ALL-LABEL: sel_icmp_nez_i32_z1: diff --git a/test/CodeGen/PowerPC/blockaddress.ll b/test/CodeGen/PowerPC/blockaddress.ll new file mode 100644 index 000000000000..c1981e21fff4 --- /dev/null +++ b/test/CodeGen/PowerPC/blockaddress.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL +; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM +; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM +; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL +; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM +; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM + +define i8* @test() { +entry: + br label %here + +here: ; preds = %entry +; MEDIUM: .Ltmp[[TMP0:[0-9]+]]: +; MEDIUM: addis [[R0:[0-9]+]], 2, .LC[[LC0:[0-9]+]]@toc@ha +; MEDIUM: ld 3, .LC[[LC0]]@toc@l([[R0]]) +; MEDIUM: blr +; MEDIUM: .LC[[LC0]]: +; MEDIUM: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]] +; SMALL: .Ltmp[[TMP0:[0-9]+]]: +; SMALL: ld 3, .LC[[LC0:[0-9]+]]@toc(2) +; SMALL: blr +; SMALL: .LC[[LC0]]: +; SMALL: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]] + ret i8* blockaddress(@test, %here) +} + diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll index f92121bd7202..c23ee7c9f5c8 100644 --- a/test/CodeGen/PowerPC/cc.ll +++ b/test/CodeGen/PowerPC/cc.ll @@ -41,7 +41,7 @@ entry: br label %foo foo: - call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a) + call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a) br i1 %c, label %bar, label %end bar: diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll index 5e00675c0398..71611060ed7a 100644 --- a/test/CodeGen/PowerPC/fast-isel-conversion.ll +++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE ; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt @@ -9,12 +10,16 @@ define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i64 +; ELF64LE: sitofp_single_i64 ; PPC970: sitofp_single_i64 %b.addr = alloca float, align 4 %conv = sitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -26,12 +31,20 @@ entry: define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i32 +; ELF64LE: sitofp_single_i32 ; PPC970: sitofp_single_i32 %b.addr = alloca float, align 4 %conv = sitofp i32 %a to float ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwax ; ELF64: fcfids +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwax +; ELF64LE: fcfids ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -43,6 +56,7 @@ entry: define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i16 +; ELF64LE: sitofp_single_i16 ; PPC970: sitofp_single_i16 %b.addr = alloca float, align 4 %conv = sitofp i16 %a to float @@ -50,6 +64,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: extsh +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: extsh ; PPC970: std ; PPC970: lfd @@ -62,6 +80,7 @@ entry: define void @sitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: sitofp_single_i8 +; ELF64LE: sitofp_single_i8 ; PPC970: sitofp_single_i8 %b.addr = alloca float, align 4 %conv = sitofp i8 %a to float @@ -69,6 +88,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: extsb +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: extsb ; PPC970: std ; PPC970: lfd @@ -81,12 +104,20 @@ entry: define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i32 +; ELF64LE: sitofp_double_i32 ; PPC970: sitofp_double_i32 %b.addr = alloca double, align 8 %conv = sitofp i32 %a to double ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwax ; ELF64: fcfid +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwax +; ELF64LE: fcfid ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -97,12 +128,16 @@ entry: define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i64 +; ELF64LE: sitofp_double_i64 ; PPC970: sitofp_double_i64 %b.addr = alloca double, align 8 %conv = sitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -113,6 +148,7 @@ entry: define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i16 +; ELF64LE: sitofp_double_i16 ; PPC970: sitofp_double_i16 %b.addr = alloca double, align 8 %conv = sitofp i16 %a to double @@ -120,6 +156,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: extsh +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: extsh ; PPC970: std ; PPC970: lfd @@ -131,6 +171,7 @@ entry: define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i8 +; ELF64LE: sitofp_double_i8 ; PPC970: sitofp_double_i8 %b.addr = alloca double, align 8 %conv = sitofp i8 %a to double @@ -138,6 +179,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: extsb +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: extsb ; PPC970: std ; PPC970: lfd @@ -151,12 +196,16 @@ entry: define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i64 +; ELF64LE: uitofp_single_i64 ; PPC970: uitofp_single_i64 %b.addr = alloca float, align 4 %conv = uitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 ret void @@ -165,12 +214,20 @@ entry: define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i32 +; ELF64LE: uitofp_single_i32 ; PPC970: uitofp_single_i32 %b.addr = alloca float, align 4 %conv = uitofp i32 %a to float ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwzx ; ELF64: fcfidus +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwzx +; ELF64LE: fcfidus ; PPC970-NOT: lfiwzx ; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 @@ -180,6 +237,7 @@ entry: define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i16 +; ELF64LE: uitofp_single_i16 ; PPC970: uitofp_single_i16 %b.addr = alloca float, align 4 %conv = uitofp i16 %a to float @@ -187,6 +245,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 ; PPC970: std ; PPC970: lfd @@ -199,6 +261,7 @@ entry: define void @uitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: uitofp_single_i8 +; ELF64LE: uitofp_single_i8 ; PPC970: uitofp_single_i8 %b.addr = alloca float, align 4 %conv = uitofp i8 %a to float @@ -206,6 +269,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 ; PPC970: std ; PPC970: lfd @@ -218,12 +285,16 @@ entry: define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i64 +; ELF64LE: uitofp_double_i64 ; PPC970: uitofp_double_i64 %b.addr = alloca double, align 8 %conv = uitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 ret void @@ -232,12 +303,20 @@ entry: define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i32 +; ELF64LE: uitofp_double_i32 ; PPC970: uitofp_double_i32 %b.addr = alloca double, align 8 %conv = uitofp i32 %a to double ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwzx ; ELF64: fcfidu +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwzx +; ELF64LE: fcfidu ; PPC970-NOT: lfiwzx ; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 @@ -247,6 +326,7 @@ entry: define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i16 +; ELF64LE: uitofp_double_i16 ; PPC970: uitofp_double_i16 %b.addr = alloca double, align 8 %conv = uitofp i16 %a to double @@ -254,6 +334,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 ; PPC970: std ; PPC970: lfd @@ -265,6 +349,7 @@ entry: define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i8 +; ELF64LE: uitofp_double_i8 ; PPC970: uitofp_double_i8 %b.addr = alloca double, align 8 %conv = uitofp i8 %a to double @@ -272,6 +357,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 ; PPC970: std ; PPC970: lfd @@ -285,12 +374,16 @@ entry: define void @fptosi_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i32 +; ELF64LE: fptosi_float_i32 ; PPC970: fptosi_float_i32 %b.addr = alloca i32, align 4 %conv = fptosi float %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; ELF64LE: fctiwz +; ELF64LE: stfd +; ELF64LE: lwa ; PPC970: fctiwz ; PPC970: stfd ; PPC970: lwa @@ -301,12 +394,16 @@ entry: define void @fptosi_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i64 +; ELF64LE: fptosi_float_i64 ; PPC970: fptosi_float_i64 %b.addr = alloca i64, align 4 %conv = fptosi float %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctidz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970: fctidz ; PPC970: stfd ; PPC970: ld @@ -317,12 +414,16 @@ entry: define void @fptosi_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i32 +; ELF64LE: fptosi_double_i32 ; PPC970: fptosi_double_i32 %b.addr = alloca i32, align 8 %conv = fptosi double %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; ELF64LE: fctiwz +; ELF64LE: stfd +; ELF64LE: lwa ; PPC970: fctiwz ; PPC970: stfd ; PPC970: lwa @@ -333,12 +434,16 @@ entry: define void @fptosi_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i64 +; ELF64LE: fptosi_double_i64 ; PPC970: fptosi_double_i64 %b.addr = alloca i64, align 8 %conv = fptosi double %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctidz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970: fctidz ; PPC970: stfd ; PPC970: ld @@ -351,12 +456,16 @@ entry: define void @fptoui_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i32 +; ELF64LE: fptoui_float_i32 ; PPC970: fptoui_float_i32 %b.addr = alloca i32, align 4 %conv = fptoui float %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; ELF64LE: fctiwuz +; ELF64LE: stfd +; ELF64LE: lwz ; PPC970: fctidz ; PPC970: stfd ; PPC970: lwz @@ -367,12 +476,16 @@ entry: define void @fptoui_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i64 +; ELF64LE: fptoui_float_i64 ; PPC970: fptoui_float_i64 %b.addr = alloca i64, align 4 %conv = fptoui float %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctiduz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 4 ret void @@ -381,12 +494,16 @@ entry: define void @fptoui_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i32 +; ELF64LE: fptoui_double_i32 ; PPC970: fptoui_double_i32 %b.addr = alloca i32, align 8 %conv = fptoui double %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; ELF64LE: fctiwuz +; ELF64LE: stfd +; ELF64LE: lwz ; PPC970: fctidz ; PPC970: stfd ; PPC970: lwz @@ -397,12 +514,16 @@ entry: define void @fptoui_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i64 +; ELF64LE: fptoui_double_i64 ; PPC970: fptoui_double_i64 %b.addr = alloca i64, align 8 %conv = fptoui double %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctiduz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 8 ret void diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll index fa19f8b11fd6..f82de70c9286 100644 --- a/test/CodeGen/PowerPC/fast-isel-ret.ll +++ b/test/CodeGen/PowerPC/fast-isel-ret.ll @@ -1,8 +1,40 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +define zeroext i1 @rettrue() nounwind uwtable ssp { +entry: +; ELF64-LABEL: rettrue +; ELF64: li 3, 1 +; ELF64: blr + ret i1 true +} + +define zeroext i1 @retfalse() nounwind uwtable ssp { +entry: +; ELF64-LABEL: retfalse +; ELF64: li 3, 0 +; ELF64: blr + ret i1 false +} + +define signext i1 @retstrue() nounwind uwtable ssp { +entry: +; ELF64-LABEL: retstrue +; ELF64: li 3, -1 +; ELF64: blr + ret i1 true +} + +define signext i1 @retsfalse() nounwind uwtable ssp { +entry: +; ELF64-LABEL: retsfalse +; ELF64: li 3, 0 +; ELF64: blr + ret i1 false +} + define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret2 +; ELF64-LABEL: ret2 ; ELF64: extsb ; ELF64: blr ret i8 %a @@ -10,7 +42,7 @@ entry: define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret3 +; ELF64-LABEL: ret3 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 ; ELF64: blr ret i8 %a @@ -18,7 +50,7 @@ entry: define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret4 +; ELF64-LABEL: ret4 ; ELF64: extsh ; ELF64: blr ret i16 %a @@ -26,7 +58,7 @@ entry: define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret5 +; ELF64-LABEL: ret5 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 ; ELF64: blr ret i16 %a @@ -34,7 +66,7 @@ entry: define i16 @ret6(i16 %a) nounwind uwtable ssp { entry: -; ELF64: ret6 +; ELF64-LABEL: ret6 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 ; ELF64: blr ret i16 %a @@ -42,7 +74,7 @@ entry: define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret7 +; ELF64-LABEL: ret7 ; ELF64: extsw ; ELF64: blr ret i32 %a @@ -50,7 +82,7 @@ entry: define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp { entry: -; ELF64: ret8 +; ELF64-LABEL: ret8 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 ; ELF64: blr ret i32 %a @@ -58,7 +90,7 @@ entry: define i32 @ret9(i32 %a) nounwind uwtable ssp { entry: -; ELF64: ret9 +; ELF64-LABEL: ret9 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 ; ELF64: blr ret i32 %a @@ -66,7 +98,7 @@ entry: define i64 @ret10(i64 %a) nounwind uwtable ssp { entry: -; ELF64: ret10 +; ELF64-LABEL: ret10 ; ELF64-NOT: exts ; ELF64-NOT: rldicl ; ELF64: blr @@ -75,21 +107,21 @@ entry: define float @ret11(float %a) nounwind uwtable ssp { entry: -; ELF64: ret11 +; ELF64-LABEL: ret11 ; ELF64: blr ret float %a } define double @ret12(double %a) nounwind uwtable ssp { entry: -; ELF64: ret12 +; ELF64-LABEL: ret12 ; ELF64: blr ret double %a } define i8 @ret13() nounwind uwtable ssp { entry: -; ELF64: ret13 +; ELF64-LABEL: ret13 ; ELF64: li ; ELF64: blr ret i8 15; @@ -97,7 +129,7 @@ entry: define i16 @ret14() nounwind uwtable ssp { entry: -; ELF64: ret14 +; ELF64-LABEL: ret14 ; ELF64: li ; ELF64: blr ret i16 -225; @@ -105,7 +137,7 @@ entry: define i32 @ret15() nounwind uwtable ssp { entry: -; ELF64: ret15 +; ELF64-LABEL: ret15 ; ELF64: lis ; ELF64: ori ; ELF64: blr @@ -114,7 +146,7 @@ entry: define i64 @ret16() nounwind uwtable ssp { entry: -; ELF64: ret16 +; ELF64-LABEL: ret16 ; ELF64: li ; ELF64: sldi ; ELF64: oris @@ -125,7 +157,7 @@ entry: define float @ret17() nounwind uwtable ssp { entry: -; ELF64: ret17 +; ELF64-LABEL: ret17 ; ELF64: addis ; ELF64: lfs ; ELF64: blr @@ -134,7 +166,7 @@ entry: define double @ret18() nounwind uwtable ssp { entry: -; ELF64: ret18 +; ELF64-LABEL: ret18 ; ELF64: addis ; ELF64: lfd ; ELF64: blr diff --git a/test/CodeGen/PowerPC/ia-mem-r0.ll b/test/CodeGen/PowerPC/ia-mem-r0.ll new file mode 100644 index 000000000000..4ab17edc5b10 --- /dev/null +++ b/test/CodeGen/PowerPC/ia-mem-r0.ll @@ -0,0 +1,94 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Make sure that we don't generate a std r, 0(0) -- the memory address cannot +; be stored in r0. +; CHECK-LABEL: @test1 +; CHECK-NOT: std {{[0-9]+}}, 0(0) +; CHECK: blr + +define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) { + %fn = alloca { i8*, void (i8*, i8*)* } + %sp = alloca i8*, align 8 + %regs = alloca [18 x i64], align 8 + store { i8*, void (i8*, i8*)* } %fn_arg, { i8*, void (i8*, i8*)* }* %fn + %1 = bitcast [18 x i64]* %regs to i64* + call void asm sideeffect "std 14, $0", "=*m"(i64* %1) + %2 = bitcast [18 x i64]* %regs to i8* + %3 = getelementptr i8* %2, i32 8 + %4 = bitcast i8* %3 to i64* + call void asm sideeffect "std 15, $0", "=*m"(i64* %4) + %5 = bitcast [18 x i64]* %regs to i8* + %6 = getelementptr i8* %5, i32 16 + %7 = bitcast i8* %6 to i64* + call void asm sideeffect "std 16, $0", "=*m"(i64* %7) + %8 = bitcast [18 x i64]* %regs to i8* + %9 = getelementptr i8* %8, i32 24 + %10 = bitcast i8* %9 to i64* + call void asm sideeffect "std 17, $0", "=*m"(i64* %10) + %11 = bitcast [18 x i64]* %regs to i8* + %12 = getelementptr i8* %11, i32 32 + %13 = bitcast i8* %12 to i64* + call void asm sideeffect "std 18, $0", "=*m"(i64* %13) + %14 = bitcast [18 x i64]* %regs to i8* + %15 = getelementptr i8* %14, i32 40 + %16 = bitcast i8* %15 to i64* + call void asm sideeffect "std 19, $0", "=*m"(i64* %16) + %17 = bitcast [18 x i64]* %regs to i8* + %18 = getelementptr i8* %17, i32 48 + %19 = bitcast i8* %18 to i64* + call void asm sideeffect "std 20, $0", "=*m"(i64* %19) + %20 = bitcast [18 x i64]* %regs to i8* + %21 = getelementptr i8* %20, i32 56 + %22 = bitcast i8* %21 to i64* + call void asm sideeffect "std 21, $0", "=*m"(i64* %22) + %23 = bitcast [18 x i64]* %regs to i8* + %24 = getelementptr i8* %23, i32 64 + %25 = bitcast i8* %24 to i64* + call void asm sideeffect "std 22, $0", "=*m"(i64* %25) + %26 = bitcast [18 x i64]* %regs to i8* + %27 = getelementptr i8* %26, i32 72 + %28 = bitcast i8* %27 to i64* + call void asm sideeffect "std 23, $0", "=*m"(i64* %28) + %29 = bitcast [18 x i64]* %regs to i8* + %30 = getelementptr i8* %29, i32 80 + %31 = bitcast i8* %30 to i64* + call void asm sideeffect "std 24, $0", "=*m"(i64* %31) + %32 = bitcast [18 x i64]* %regs to i8* + %33 = getelementptr i8* %32, i32 88 + %34 = bitcast i8* %33 to i64* + call void asm sideeffect "std 25, $0", "=*m"(i64* %34) + %35 = bitcast [18 x i64]* %regs to i8* + %36 = getelementptr i8* %35, i32 96 + %37 = bitcast i8* %36 to i64* + call void asm sideeffect "std 26, $0", "=*m"(i64* %37) + %38 = bitcast [18 x i64]* %regs to i8* + %39 = getelementptr i8* %38, i32 104 + %40 = bitcast i8* %39 to i64* + call void asm sideeffect "std 27, $0", "=*m"(i64* %40) + %41 = bitcast [18 x i64]* %regs to i8* + %42 = getelementptr i8* %41, i32 112 + %43 = bitcast i8* %42 to i64* + call void asm sideeffect "std 28, $0", "=*m"(i64* %43) + %44 = bitcast [18 x i64]* %regs to i8* + %45 = getelementptr i8* %44, i32 120 + %46 = bitcast i8* %45 to i64* + call void asm sideeffect "std 29, $0", "=*m"(i64* %46) + %47 = bitcast [18 x i64]* %regs to i8* + %48 = getelementptr i8* %47, i32 128 + %49 = bitcast i8* %48 to i64* + call void asm sideeffect "std 30, $0", "=*m"(i64* %49) + %50 = bitcast [18 x i64]* %regs to i8* + %51 = getelementptr i8* %50, i32 136 + %52 = bitcast i8* %51 to i64* + call void asm sideeffect "std 31, $0", "=*m"(i64* %52) + %53 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1 + %.funcptr = load void (i8*, i8*)** %53 + %54 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0 + %.ptr = load i8** %54 + %55 = load i8** %sp + call void %.funcptr(i8* %.ptr, i8* %55) + ret void +} + diff --git a/test/CodeGen/PowerPC/ia-neg-const.ll b/test/CodeGen/PowerPC/ia-neg-const.ll new file mode 100644 index 000000000000..165fc1339d0b --- /dev/null +++ b/test/CodeGen/PowerPC/ia-neg-const.ll @@ -0,0 +1,25 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1 + +; Function Attrs: nounwind +define i64 @main() #0 { +entry: + %x = alloca i64, align 8 + store i64 0, i64* %x, align 8 + %0 = call i64 asm sideeffect "ld $0,$1\0A\09add${2:I} $0,$0,$2", "=&r,*m,Ir"(i64* %x, i64 -1) #0 + ret i64 %0 +} + +; CHECK: ld +; CHECK-NOT: addi 3,3,4294967295 +; CHECK: addi 3,3,-1 +; CHECK: blr + +; Function Attrs: nounwind +declare signext i32 @printf(i8* nocapture readonly, ...) #0 + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll index a59fceb5bdd0..762f50a9cbe0 100644 --- a/test/CodeGen/PowerPC/stack-realign.ll +++ b/test/CodeGen/PowerPC/stack-realign.ll @@ -37,6 +37,7 @@ entry: ; CHECK-DAG: subfic 0, [[REG]], -160 ; CHECK: stdux 1, 1, 0 +; CHECK: .cfi_def_cfa_register r30 ; CHECK: .cfi_offset r30, -16 ; CHECK: .cfi_offset lr, 16 @@ -59,6 +60,7 @@ entry: ; CHECK-FP-DAG: subfic 0, [[REG]], -160 ; CHECK-FP: stdux 1, 1, 0 +; CHECK-FP: .cfi_def_cfa_register r30 ; CHECK-FP: .cfi_offset r31, -8 ; CHECK-FP: .cfi_offset r30, -16 ; CHECK-FP: .cfi_offset lr, 16 @@ -120,6 +122,8 @@ entry: ; CHECK-DAG: subfc 0, [[REG3]], [[REG2]] ; CHECK: stdux 1, 1, 0 +; CHECK: .cfi_def_cfa_register r30 + ; CHECK: blr ; CHECK-32-LABEL: @hoo @@ -178,6 +182,8 @@ entry: ; CHECK-DAG: subfic 0, [[REG]], -192 ; CHECK: stdux 1, 1, 0 +; CHECK: .cfi_def_cfa_register r30 + ; CHECK: stfd 30, -16(30) ; CHECK: blr @@ -193,6 +199,8 @@ entry: ; CHECK-FP-DAG: subfic 0, [[REG]], -192 ; CHECK-FP: stdux 1, 1, 0 +; CHECK-FP: .cfi_def_cfa_register r30 + ; CHECK-FP: stfd 30, -16(30) ; CHECK-FP: blr diff --git a/test/CodeGen/PowerPC/subreg-postra-2.ll b/test/CodeGen/PowerPC/subreg-postra-2.ll new file mode 100644 index 000000000000..2faaa6129294 --- /dev/null +++ b/test/CodeGen/PowerPC/subreg-postra-2.ll @@ -0,0 +1,175 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @jbd2_journal_commit_transaction() #0 { +entry: + br i1 undef, label %do.body, label %if.then5 + +if.then5: ; preds = %entry + unreachable + +do.body: ; preds = %entry + br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit + +do.body.i: ; preds = %do.body + unreachable + +trace_jbd2_start_commit.exit: ; preds = %do.body + br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit + +do.body.i1116: ; preds = %trace_jbd2_start_commit.exit + unreachable + +trace_jbd2_commit_locking.exit: ; preds = %trace_jbd2_start_commit.exit + br i1 undef, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %trace_jbd2_commit_locking.exit + unreachable + +while.end: ; preds = %trace_jbd2_commit_locking.exit + br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144 + +if.then.i.i.i.i1144: ; preds = %while.end + unreachable + +spin_unlock.exit1146: ; preds = %while.end + br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152 + +if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146 + unreachable + +spin_unlock.exit1154: ; preds = %spin_unlock.exit1146 + br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit + +do.body.i1159: ; preds = %spin_unlock.exit1154 + br i1 undef, label %if.end.i1166, label %do.body5.i1165 + +do.body5.i1165: ; preds = %do.body.i1159 + unreachable + +if.end.i1166: ; preds = %do.body.i1159 + unreachable + +trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154 + br i1 undef, label %for.end.i, label %for.body.lr.ph.i + +for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit + unreachable + +for.end.i: ; preds = %trace_jbd2_commit_flushing.exit + br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i + +if.then.i.i.i.i31.i: ; preds = %for.end.i + br label %journal_submit_data_buffers.exit + +journal_submit_data_buffers.exit: ; preds = %if.then.i.i.i.i31.i, %for.end.i + br i1 undef, label %if.end103, label %if.then102 + +if.then102: ; preds = %journal_submit_data_buffers.exit + unreachable + +if.end103: ; preds = %journal_submit_data_buffers.exit + br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit + +do.body.i1182: ; preds = %if.end103 + br i1 undef, label %if.end.i1189, label %do.body5.i1188 + +do.body5.i1188: ; preds = %do.body5.i1188, %do.body.i1182 + br i1 undef, label %if.end.i1189, label %do.body5.i1188 + +if.end.i1189: ; preds = %do.body5.i1188, %do.body.i1182 + unreachable + +trace_jbd2_commit_logging.exit: ; preds = %if.end103 + br label %while.cond129.outer1451 + +while.cond129.outer1451: ; preds = %start_journal_io, %trace_jbd2_commit_logging.exit + br label %while.cond129 + +while.cond129: ; preds = %if.then135, %while.cond129.outer1451 + br i1 undef, label %while.end246, label %if.then135 + +if.then135: ; preds = %while.cond129 + br i1 undef, label %start_journal_io, label %while.cond129 + +start_journal_io: ; preds = %if.then135 + br label %while.cond129.outer1451 + +while.end246: ; preds = %while.cond129 + br i1 undef, label %for.end.i1287, label %for.body.i1277 + +for.body.i1277: ; preds = %while.end246 + unreachable + +for.end.i1287: ; preds = %while.end246 + br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i + +if.then.i.i.i.i84.i: ; preds = %for.end.i1287 + unreachable + +journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287 + br i1 undef, label %if.end256, label %if.then249 + +if.then249: ; preds = %journal_finish_inode_data_buffers.exit + unreachable + +if.end256: ; preds = %journal_finish_inode_data_buffers.exit + br label %while.body318 + +while.body318: ; preds = %wait_on_buffer.exit, %if.end256 + br i1 undef, label %wait_on_buffer.exit, label %if.then.i1296 + +if.then.i1296: ; preds = %while.body318 + br label %wait_on_buffer.exit + +wait_on_buffer.exit: ; preds = %if.then.i1296, %while.body318 + br i1 undef, label %do.body378, label %while.body318 + +do.body378: ; preds = %wait_on_buffer.exit + br i1 undef, label %while.end418, label %while.body392.lr.ph + +while.body392.lr.ph: ; preds = %do.body378 + br label %while.body392 + +while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph + %0 = load i8** undef, align 8 + %add.ptr399 = getelementptr inbounds i8* %0, i64 -72 + %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64* + %tobool.i1316 = icmp eq i64 undef, 0 + br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317 + +if.then.i1317: ; preds = %while.body392 + unreachable + +wait_on_buffer.exit1319: ; preds = %while.body392 + %1 = load volatile i64* %b_state.i.i1314, align 8 + %conv.i.i1322 = and i64 %1, 1 + %lnot404 = icmp eq i64 %conv.i.i1322, 0 + %.err.4 = select i1 %lnot404, i32 -5, i32 undef + %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #0 + store i8* %0, i8** undef, align 8 + %cmp.i1312 = icmp eq i32* undef, undef + br i1 %cmp.i1312, label %while.end418, label %while.body392 + +while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378 + %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ] + %tobool419 = icmp eq i32 %err.4.lcssa, 0 + br i1 %tobool419, label %if.end421, label %if.then420 + +; CHECK-LABEL: @jbd2_journal_commit_transaction +; CHECK: andi. +; CHECK: cror [[REG:[0-9]+]], 1, 1 +; CHECK: stdcx. +; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]] + +if.then420: ; preds = %while.end418 + unreachable + +if.end421: ; preds = %while.end418 + unreachable +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/subreg-postra.ll b/test/CodeGen/PowerPC/subreg-postra.ll new file mode 100644 index 000000000000..b10fa668cb8d --- /dev/null +++ b/test/CodeGen/PowerPC/subreg-postra.ll @@ -0,0 +1,168 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @jbd2_journal_commit_transaction(i32* %journal) #0 { +entry: + br i1 undef, label %do.body, label %if.then5 + +if.then5: ; preds = %entry + unreachable + +do.body: ; preds = %entry + br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit + +do.body.i: ; preds = %do.body + unreachable + +trace_jbd2_start_commit.exit: ; preds = %do.body + br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit + +do.body.i1116: ; preds = %trace_jbd2_start_commit.exit + br i1 undef, label %if.end.i1123, label %do.body5.i1122 + +do.body5.i1122: ; preds = %do.body.i1116 + unreachable + +if.end.i1123: ; preds = %do.body.i1116 + br label %trace_jbd2_commit_locking.exit + +trace_jbd2_commit_locking.exit: ; preds = %if.end.i1123, %trace_jbd2_start_commit.exit + br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144 + +if.then.i.i.i.i1144: ; preds = %trace_jbd2_commit_locking.exit + unreachable + +spin_unlock.exit1146: ; preds = %trace_jbd2_commit_locking.exit + br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152 + +if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146 + br label %spin_unlock.exit1154 + +spin_unlock.exit1154: ; preds = %if.then.i.i.i.i1152, %spin_unlock.exit1146 + br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit + +do.body.i1159: ; preds = %spin_unlock.exit1154 + unreachable + +trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154 + br i1 undef, label %for.end.i, label %for.body.lr.ph.i + +for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit + br i1 undef, label %spin_unlock.exit.i, label %if.then.i.i.i.i.i + +if.then.i.i.i.i.i: ; preds = %for.body.lr.ph.i + unreachable + +spin_unlock.exit.i: ; preds = %for.body.lr.ph.i + unreachable + +for.end.i: ; preds = %trace_jbd2_commit_flushing.exit + br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i + +if.then.i.i.i.i31.i: ; preds = %for.end.i + unreachable + +journal_submit_data_buffers.exit: ; preds = %for.end.i + br i1 undef, label %if.end103, label %if.then102 + +if.then102: ; preds = %journal_submit_data_buffers.exit + unreachable + +if.end103: ; preds = %journal_submit_data_buffers.exit + br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit + +do.body.i1182: ; preds = %if.end103 + unreachable + +trace_jbd2_commit_logging.exit: ; preds = %if.end103 + br i1 undef, label %for.end.i1287, label %for.body.i1277 + +for.body.i1277: ; preds = %trace_jbd2_commit_logging.exit + unreachable + +for.end.i1287: ; preds = %trace_jbd2_commit_logging.exit + br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i + +if.then.i.i.i.i84.i: ; preds = %for.end.i1287 + unreachable + +journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287 + br i1 undef, label %if.end256, label %if.then249 + +if.then249: ; preds = %journal_finish_inode_data_buffers.exit + unreachable + +if.end256: ; preds = %journal_finish_inode_data_buffers.exit + br i1 undef, label %do.body277, label %if.then260 + +if.then260: ; preds = %if.end256 + br label %do.body277 + +do.body277: ; preds = %if.then260, %if.end256 + br label %while.body318 + +while.body318: ; preds = %wait_on_buffer.exit, %do.body277 + %tobool.i1295 = icmp eq i64 undef, 0 + br i1 %tobool.i1295, label %wait_on_buffer.exit, label %if.then.i1296 + +if.then.i1296: ; preds = %while.body318 + unreachable + +wait_on_buffer.exit: ; preds = %while.body318 + br i1 undef, label %do.body378, label %while.body318 + +do.body378: ; preds = %wait_on_buffer.exit + br i1 undef, label %while.end418, label %while.body392.lr.ph + +while.body392.lr.ph: ; preds = %do.body378 + br label %while.body392 + +while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph + %0 = load i8** undef, align 8 + %add.ptr399 = getelementptr inbounds i8* %0, i64 -72 + %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64* + %tobool.i1316 = icmp eq i64 undef, 0 + br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317 + +if.then.i1317: ; preds = %while.body392 + unreachable + +wait_on_buffer.exit1319: ; preds = %while.body392 + %1 = load volatile i64* %b_state.i.i1314, align 8 + %conv.i.i1322 = and i64 %1, 1 + %lnot404 = icmp eq i64 %conv.i.i1322, 0 + %.err.4 = select i1 %lnot404, i32 -5, i32 undef + %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1 + %prev.i.i.i1325 = getelementptr inbounds i8* %0, i64 8 + %3 = load i32** null, align 8 + store i32* %3, i32** undef, align 8 + call void @__brelse(i32* undef) #1 + br i1 undef, label %while.end418, label %while.body392 + +; CHECK-LABEL: @jbd2_journal_commit_transaction +; CHECK: andi. +; CHECK: cror [[REG:[0-9]+]], 1, 1 +; CHECK: stdcx. +; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]] + +while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378 + %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ] + br i1 undef, label %if.end421, label %if.then420 + +if.then420: ; preds = %while.end418 + call void @jbd2_journal_abort(i32* %journal, i32 signext %err.4.lcssa) #1 + br label %if.end421 + +if.end421: ; preds = %if.then420, %while.end418 + unreachable +} + +declare void @jbd2_journal_abort(i32*, i32 signext) + +declare void @__brelse(i32*) + +attributes #0 = { nounwind } +attributes #1 = { nounwind } + diff --git a/test/CodeGen/PowerPC/tls-pic.ll b/test/CodeGen/PowerPC/tls-pic.ll index 9f3ab6e3b491..9ba372591e6e 100644 --- a/test/CodeGen/PowerPC/tls-pic.ll +++ b/test/CodeGen/PowerPC/tls-pic.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s ; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s +; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s +; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s target triple = "powerpc64-unknown-linux-gnu" ; Test correct assembly code generation for thread-local storage using @@ -22,6 +24,16 @@ entry: ; OPT0-NEXT: nop ; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha ; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l +; OPT0-32-LABEL: main +; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld +; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT +; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha +; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l +; OPT1-32-LABEL: main +; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld +; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT +; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha +; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l ; Test peephole optimization for thread-local storage using the ; local dynamic model. @@ -52,4 +64,6 @@ entry: ; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l ; OPT1: bl __tls_get_addr(a2@tlsgd) ; OPT1-NEXT: nop - +; OPT1-32-LABEL: main2 +; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd +; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll new file mode 100644 index 000000000000..f884dd8a0a17 --- /dev/null +++ b/test/CodeGen/PowerPC/tls-store2.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Test back-to-back stores of TLS variables to ensure call sequences no +; longer overlap. + +@__once_callable = external thread_local global i8** +@__once_call = external thread_local global void ()* + +define i64 @call_once(i64 %flag, i8* %ptr) { +entry: + %var = alloca i8*, align 8 + store i8* %ptr, i8** %var, align 8 + store i8** %var, i8*** @__once_callable, align 8 + store void ()* @__once_call_impl, void ()** @__once_call, align 8 + ret i64 %flag +} + +; CHECK-LABEL: call_once: +; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha +; CHECK: addi 3, 3, __once_callable@got@tlsgd@l +; CHECK: bl __tls_get_addr(__once_callable@tlsgd) +; CHECK-NEXT: nop +; CHECK: std {{[0-9]+}}, 0(3) +; CHECK: addis 3, 2, __once_call@got@tlsgd@ha +; CHECK: addi 3, 3, __once_call@got@tlsgd@l +; CHECK: bl __tls_get_addr(__once_call@tlsgd) +; CHECK-NEXT: nop +; CHECK: std {{[0-9]+}}, 0(3) + +declare void @__once_call_impl() |