diff options
Diffstat (limited to 'test/CodeGen/X86')
45 files changed, 833 insertions, 117 deletions
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index 89b127cccf82..bdbe713a2956 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -10,9 +10,8 @@ entry: cond_true: ; preds = %cond_true, %entry %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2] %tmp. = shl i32 %indvar, 2 ; <i32> [#uses=1] - %tmp.10 = add i32 %tmp., 1 ; <i32> [#uses=2] - %k.0.0 = bitcast i32 %tmp.10 to i32 ; <i32> [#uses=2] - %tmp31 = add i32 %k.0.0, -1 ; <i32> [#uses=4] + %tmp.10 = add nsw i32 %tmp., 1 ; <i32> [#uses=2] + %tmp31 = add nsw i32 %tmp.10, -1 ; <i32> [#uses=4] %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1] %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1] %tmp = load <16 x i8>* %tmp34, align 1 @@ -37,14 +36,13 @@ cond_true: ; preds = %cond_true, %entry %tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1] %tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1] %tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1] - %gep.upgrd.6 = zext i32 %tmp.10 to i64 ; <i64> [#uses=1] - %tmp137 = getelementptr i32* %mc, i64 %gep.upgrd.6 ; <i32*> [#uses=1] + %tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1] %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 - %tmp147 = add i32 %k.0.0, 8 ; <i32> [#uses=1] - %tmp.upgrd.8 = icmp sgt i32 %tmp147, %M ; <i1> [#uses=1] + %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] + %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] - br i1 %tmp.upgrd.8, label %return, label %cond_true + br i1 %tmp.upgrd.8, label %cond_true, label %return return: ; preds = %cond_true, %entry ret void diff --git a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll new file mode 100644 index 000000000000..de226a140ad1 --- /dev/null +++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll @@ -0,0 +1,19 @@ +; RUN: llc %s -o - -march=x86-64 | grep {(%rdi,%rax,8)} +; RUN: llc %s -o - -march=x86-64 | not grep {addq.*8} + +define void @foo(double* %y) nounwind { +entry: + br label %bb + +bb: + %i = phi i64 [ 0, %entry ], [ %k, %bb ] + %j = getelementptr double* %y, i64 %i + store double 0.000000e+00, double* %j + %k = add i64 %i, 1 + %n = icmp eq i64 %k, 0 + br i1 %n, label %return, label %bb + +return: + ret void +} + diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll index 93e880854985..10bbe7442007 100644 --- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll +++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=x86 | grep {leal 3(,%eax,8)} ;; This example can't fold the or into an LEA. -define i32 @test(float ** %tmp2, i32 %tmp12) { +define i32 @test(float ** %tmp2, i32 %tmp12) nounwind { %tmp3 = load float** %tmp2 %tmp132 = shl i32 %tmp12, 2 ; <i32> [#uses=1] %tmp4 = bitcast float* %tmp3 to i8* ; <i8*> [#uses=1] @@ -14,7 +14,7 @@ define i32 @test(float ** %tmp2, i32 %tmp12) { ;; This can! -define i32 @test2(i32 %a, i32 %b) { +define i32 @test2(i32 %a, i32 %b) nounwind { %c = shl i32 %a, 3 %d = or i32 %c, 3 ret i32 %d diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index cb1b1efae3e2..a4d642b40354 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -6,13 +6,13 @@ target triple = "x86_64-apple-darwin10.0" %struct.__Rec = type opaque %struct.__vv = type { } -define %struct.__vv* @t(%struct.Key* %desc) nounwind ssp { +define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { entry: br label %bb4 bb4: ; preds = %bb.i, %bb26, %bb4, %entry %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0] - %ins = or i64 0, 0 ; <i64> [#uses=1] + %ins = or i64 %p, 2097152 ; <i64> [#uses=1] %1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; <i32> [#uses=1] %cond = icmp eq i32 %1, 1 ; <i1> [#uses=1] br i1 %cond, label %bb26, label %bb4 diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 8a0b244a23fa..3cd54169745d 100644 --- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,12 +1,18 @@ -; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4 +; RUN: llc < %s | FileCheck %s + ; Check that the shr(shl X, 56), 48) is not mistakenly turned into ; a shr (X, -8) that gets subsequently "optimized away" as undef ; PR4254 + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { entry: +; CHECK: foo: +; CHECK: shlq $56, %rdi +; CHECK: sarq $48, %rdi +; CHECK: leaq 1(%rdi), %rax %shl = shl i64 %b, 56 ; <i64> [#uses=1] %shr = ashr i64 %shl, 48 ; <i64> [#uses=1] %add5 = or i64 %shr, 1 ; <i64> [#uses=1] diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll index a7c202076da8..8f274df918d1 100644 --- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll +++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll @@ -10,7 +10,7 @@ entry: br i1 %0, label %bb.nph, label %return bb.nph: ; preds = %entry -; CHECK: movq _g@GOTPCREL(%rip), %rcx +; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]] %tmp = zext i32 %n to i64 ; <i64> [#uses=1] br label %bb diff --git a/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll new file mode 100644 index 000000000000..e7004e28752e --- /dev/null +++ b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86-64 +; <rdar://problem/7499313> +target triple = "i686-apple-darwin8" + +declare void @func2(i16 zeroext) + +define void @func1() nounwind { +entry: + %t1 = icmp ne i8 undef, 0 + %t2 = icmp eq i8 undef, 14 + %t3 = and i1 %t1, %t2 + %t4 = select i1 %t3, i16 0, i16 128 + call void @func2(i16 zeroext %t4) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-01-07-ISelBug.ll b/test/CodeGen/X86/2010-01-07-ISelBug.ll new file mode 100644 index 000000000000..081fab7facfe --- /dev/null +++ b/test/CodeGen/X86/2010-01-07-ISelBug.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 +; rdar://r7519827 + +define i32 @t() nounwind ssp { +entry: + br label %if.end.i11 + +if.end.i11: ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10 + br i1 undef, label %for.body161, label %for.end197 + +for.body161: ; preds = %if.end.i11 + br label %for.end197 + +for.end197: ; preds = %for.body161, %if.end.i11 + %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; <i96> [#uses=2] + store i96 %mlucEntry.4, i96* undef, align 8 + %tmp172 = lshr i96 %mlucEntry.4, 64 ; <i96> [#uses=1] + %tmp173 = trunc i96 %tmp172 to i32 ; <i32> [#uses=1] + %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; <i32> [#uses=1] + store i32 %tmp1.i1.i, i32* undef, align 8 + unreachable + +if.then283: ; preds = %lor.lhs.false.i10, %do.end105, %for.end + ret i32 undef +} + +declare i32 @llvm.bswap.i32(i32) nounwind readnone diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll new file mode 100644 index 000000000000..3728f15d969c --- /dev/null +++ b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll @@ -0,0 +1,11 @@ +; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s +; CHECK: addps ( + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind { + %A = load <4 x float>* %P, align 4 + %B = add <4 x float> %A, %In + ret <4 x float> %B +} diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll new file mode 100644 index 000000000000..172e1c73d568 --- /dev/null +++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; rdar://r7512579 + +; PHI defs in the atomic loop should be used by the add / adc +; instructions. They should not be dead. + +define void @t(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: t: +; CHECK: movl $1 +; CHECK: movl (%ebp), %eax +; CHECK: movl 4(%ebp), %edx +; CHECK: LBB1_1: +; CHECK-NOT: movl $1 +; CHECK-NOT: movl $0 +; CHECK: addl +; CHECK: adcl +; CHECK: lock +; CHECK: cmpxchg8b +; CHECK: jne + tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0] + tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind diff --git a/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll new file mode 100644 index 000000000000..db98eef30e1e --- /dev/null +++ b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll @@ -0,0 +1,97 @@ +; RUN: llc -verify-machineinstrs < %s +; +; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction. +; +target triple = "x86_64-apple-darwin10" + +define void @foo() { + br label %cond_true813.i + +cond_true813.i: ; preds = %0 + br i1 false, label %cond_true818.i, label %cond_next1146.i + +cond_true818.i: ; preds = %cond_true813.i + br i1 false, label %recog_memoized.exit52, label %cond_next1146.i + +recog_memoized.exit52: ; preds = %cond_true818.i + switch i32 0, label %bb886.i.preheader [ + i32 0, label %bb907.i + i32 44, label %bb866.i + i32 103, label %bb874.i + i32 114, label %bb874.i + ] + +bb857.i: ; preds = %bb886.i, %bb866.i + %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; <i8*> [#uses=1] + switch i32 0, label %bb886.i.preheader [ + i32 0, label %bb907.i + i32 44, label %bb866.i + i32 103, label %bb874.i + i32 114, label %bb874.i + ] + +bb866.i.loopexit: ; preds = %bb874.i + br label %bb866.i + +bb866.i.loopexit31: ; preds = %cond_true903.i + br label %bb866.i + +bb866.i: ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52 + br i1 false, label %bb907.i, label %bb857.i + +bb874.i.preheader.loopexit: ; preds = %cond_true903.i, %cond_true903.i + ret void + +bb874.i: ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52 + switch i32 0, label %bb886.i.preheader.loopexit [ + i32 0, label %bb907.i + i32 44, label %bb866.i.loopexit + i32 103, label %bb874.i.backedge + i32 114, label %bb874.i.backedge + ] + +bb874.i.backedge: ; preds = %bb874.i, %bb874.i + ret void + +bb886.i.preheader.loopexit: ; preds = %bb874.i + ret void + +bb886.i.preheader: ; preds = %bb857.i, %recog_memoized.exit52 + %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; <i8*> [#uses=1] + br label %bb886.i + +bb886.i: ; preds = %cond_true903.i, %bb886.i.preheader + br i1 false, label %bb857.i, label %cond_true903.i + +cond_true903.i: ; preds = %bb886.i + switch i32 0, label %bb886.i [ + i32 0, label %bb907.i + i32 44, label %bb866.i.loopexit31 + i32 103, label %bb874.i.preheader.loopexit + i32 114, label %bb874.i.preheader.loopexit + ] + +bb907.i: ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52 + br i1 false, label %cond_next1146.i, label %cond_true910.i + +cond_true910.i: ; preds = %bb907.i + ret void + +cond_next1146.i: ; preds = %bb907.i, %cond_true818.i, %cond_true813.i + ret void + +bb2060.i: ; No predecessors! + br i1 false, label %cond_true2064.i, label %bb2067.i + +cond_true2064.i: ; preds = %bb2060.i + unreachable + +bb2067.i: ; preds = %bb2060.i + ret void + +cond_next3473: ; No predecessors! + ret void + +cond_next3521: ; No predecessors! + ret void +} diff --git a/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/test/CodeGen/X86/2010-01-13-OptExtBug.ll new file mode 100644 index 000000000000..d49e2a8d0798 --- /dev/null +++ b/test/CodeGen/X86/2010-01-13-OptExtBug.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=i386-pc-linux-gnu +; PR6027 + +%class.OlsonTimeZone = type { i16, i32*, i8*, i16 } + +define void @XX(%class.OlsonTimeZone* %this) align 2 { +entry: + %call = tail call i8* @_Z15uprv_malloc_4_2v() + %0 = bitcast i8* %call to double* + %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3 + %tmp2 = load i16* %tmp + %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0 + %tmp626 = load i16* %tmp525 + %cmp27 = icmp slt i16 %tmp2, %tmp626 + br i1 %cmp27, label %bb.nph, label %for.end + +for.cond: + %tmp6 = load i16* %tmp5 + %cmp = icmp slt i16 %inc, %tmp6 + %indvar.next = add i32 %indvar, 1 + br i1 %cmp, label %for.body, label %for.end + +bb.nph: + %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2 + %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1 + %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0 + %tmp29 = sext i16 %tmp2 to i32 + %tmp31 = add i16 %tmp2, 1 + %tmp32 = zext i16 %tmp31 to i32 + br label %for.body + +for.body: + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ] + %tmp30 = add i32 %indvar, %tmp29 + %tmp33 = add i32 %indvar, %tmp32 + %inc = trunc i32 %tmp33 to i16 + %tmp11 = load i8** %tmp10 + %arrayidx = getelementptr i8* %tmp11, i32 %tmp30 + %tmp12 = load i8* %arrayidx + br label %for.cond + +for.end: + ret void +} + +declare i8* @_Z15uprv_malloc_4_2v() diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll new file mode 100644 index 000000000000..30a1f36850de --- /dev/null +++ b/test/CodeGen/X86/3addr-or.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7527734 + +define i32 @test(i32 %x) nounwind readnone ssp { +entry: +; CHECK: test: +; CHECK: leal 3(%rdi), %eax + %0 = shl i32 %x, 5 ; <i32> [#uses=1] + %1 = or i32 %0, 3 ; <i32> [#uses=1] + ret i32 %1 +} + +define i64 @test2(i8 %A, i8 %B) nounwind { +; CHECK: test2: +; CHECK: shrq $4 +; CHECK-NOT: movq +; CHECK-NOT: orq +; CHECK: leaq +; CHECK: ret + %C = zext i8 %A to i64 ; <i64> [#uses=1] + %D = shl i64 %C, 4 ; <i64> [#uses=1] + %E = and i64 %D, 48 ; <i64> [#uses=1] + %F = zext i8 %B to i64 ; <i64> [#uses=1] + %G = lshr i64 %F, 4 ; <i64> [#uses=1] + %H = or i64 %G, %E ; <i64> [#uses=1] + ret i64 %H +} diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll new file mode 100644 index 000000000000..547d6b57657a --- /dev/null +++ b/test/CodeGen/X86/addr-label-difference.ll @@ -0,0 +1,22 @@ +; RUN: llc %s -o - | grep {__TEXT,__const} +; PR5929 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.0" + +; This array should go into the __TEXT,__const section, not into the +; __DATA,__const section, because the elements don't need relocations. +@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1] + +define void @test(i32 %i) nounwind ssp { +entry: + br label %foo + +foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto + br label %bar + +bar: ; preds = %foo, %indirectgoto + br label %hack + +hack: ; preds = %bar, %indirectgoto + ret void +} diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll index b5ac23b24128..38db88af12c2 100644 --- a/test/CodeGen/X86/and-su.ll +++ b/test/CodeGen/X86/and-su.ll @@ -1,16 +1,53 @@ -; RUN: llc < %s -march=x86 | grep {(%} | count 1 +; RUN: llc < %s -march=x86 | FileCheck %s ; Don't duplicate the load. define fastcc i32 @foo(i32* %p) nounwind { +; CHECK: foo: +; CHECK: andl $10, %eax +; CHECK: je %t0 = load i32* %p %t2 = and i32 %t0, 10 %t3 = icmp ne i32 %t2, 0 br i1 %t3, label %bb63, label %bb76 - bb63: ret i32 %t2 - bb76: ret i32 0 } + +define fastcc double @bar(i32 %hash, double %x, double %y) nounwind { +entry: +; CHECK: bar: + %0 = and i32 %hash, 15 + %1 = icmp ult i32 %0, 8 + br i1 %1, label %bb11, label %bb10 + +bb10: +; CHECK: bb10 +; CHECK: testb $1 + %2 = and i32 %hash, 1 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %bb13, label %bb11 + +bb11: + %4 = fsub double -0.000000e+00, %x + br label %bb13 + +bb13: +; CHECK: bb13 +; CHECK: testb $2 + %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ] + %5 = and i32 %hash, 2 + %6 = icmp eq i32 %5, 0 + br i1 %6, label %bb16, label %bb14 + +bb14: + %7 = fsub double -0.000000e+00, %y + br label %bb16 + +bb16: + %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ] + %8 = fadd double %iftmp.9.0, %iftmp.10.0 + ret double %8 +} diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll deleted file mode 100644 index 0cf169eb28d8..000000000000 --- a/test/CodeGen/X86/anyext-uses.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep mov %t | count 8 -; RUN: not grep implicit %t - -; Avoid partial register updates; don't define an i8 register and read -; the i32 super-register. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-apple-darwin9.6" - %struct.RC4_KEY = type { i8, i8, [256 x i8] } - -define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind { -entry: - br label %bb24 - -bb24: ; preds = %bb24, %entry - %0 = load i8* null, align 1 ; <i8> [#uses=1] - %1 = zext i8 %0 to i64 ; <i64> [#uses=1] - %2 = shl i64 %1, 32 ; <i64> [#uses=1] - %3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1] - %4 = load i8* %3, align 1 ; <i8> [#uses=2] - %5 = add i8 %4, 0 ; <i8> [#uses=2] - %6 = zext i8 %5 to i64 ; <i64> [#uses=0] - %7 = load i8* null, align 1 ; <i8> [#uses=1] - %8 = zext i8 %4 to i32 ; <i32> [#uses=1] - %9 = zext i8 %7 to i32 ; <i32> [#uses=1] - %10 = add i32 %9, %8 ; <i32> [#uses=1] - %11 = and i32 %10, 255 ; <i32> [#uses=1] - %12 = zext i32 %11 to i64 ; <i64> [#uses=1] - %13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; <i8*> [#uses=1] - %14 = load i8* %13, align 1 ; <i8> [#uses=1] - %15 = zext i8 %14 to i64 ; <i64> [#uses=1] - %16 = shl i64 %15, 48 ; <i64> [#uses=1] - %17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1] - %18 = load i8* %17, align 1 ; <i8> [#uses=2] - %19 = add i8 %18, %5 ; <i8> [#uses=1] - %20 = zext i8 %19 to i64 ; <i64> [#uses=1] - %21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; <i8*> [#uses=1] - store i8 %18, i8* %21, align 1 - %22 = or i64 0, %2 ; <i64> [#uses=1] - %23 = or i64 %22, 0 ; <i64> [#uses=1] - %24 = or i64 %23, %16 ; <i64> [#uses=1] - %25 = or i64 %24, 0 ; <i64> [#uses=1] - %26 = xor i64 %25, 0 ; <i64> [#uses=1] - store i64 %26, i64* null, align 8 - br label %bb24 -} diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll new file mode 100644 index 000000000000..8af3bd1bc229 --- /dev/null +++ b/test/CodeGen/X86/br-fold.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; CHECK: orq +; CHECK-NEXT: jne + +@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1] +@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1] + +define fastcc void @foo() { +entry: + br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64), + i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0), + label %bb8.i329, label %bb4.i.i318.preheader + +bb4.i.i318.preheader: ; preds = %bb6 + unreachable + +bb8.i329: ; preds = %bb6 + unreachable +} diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll new file mode 100644 index 000000000000..130483ad8410 --- /dev/null +++ b/test/CodeGen/X86/brcond.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s +; rdar://7475489 + +define i32 @test1(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: test1: +; CHECK: xorb +; CHECK-NOT: andb +; CHECK-NOT: shrb +; CHECK: testb $64 + %0 = and i32 %a, 16384 + %1 = icmp ne i32 %0, 0 + %2 = and i32 %b, 16384 + %3 = icmp ne i32 %2, 0 + %4 = xor i1 %1, %3 + br i1 %4, label %bb1, label %bb + +bb: ; preds = %entry + %5 = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=1] + ret i32 %5 + +bb1: ; preds = %entry + %6 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1] + ret i32 %6 +} + +declare i32 @foo(...) + +declare i32 @bar(...) + + + +; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0 +define i32 @test2(i32* %P, i32* %Q) nounwind ssp { +entry: + %a = icmp eq i32* %P, null ; <i1> [#uses=1] + %b = icmp eq i32* %Q, null ; <i1> [#uses=1] + %c = and i1 %a, %b + br i1 %c, label %bb1, label %return + +bb1: ; preds = %entry + ret i32 4 + +return: ; preds = %entry + ret i32 192 +; CHECK: test2: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: orl 8(%esp), %eax +; CHECK-NEXT: jne LBB2_2 +} + +; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0 +define i32 @test3(i32* %P, i32* %Q) nounwind ssp { +entry: + %a = icmp ne i32* %P, null ; <i1> [#uses=1] + %b = icmp ne i32* %Q, null ; <i1> [#uses=1] + %c = or i1 %a, %b + br i1 %c, label %bb1, label %return + +bb1: ; preds = %entry + ret i32 4 + +return: ; preds = %entry + ret i32 192 +; CHECK: test3: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: orl 8(%esp), %eax +; CHECK-NEXT: je LBB3_2 +} diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll index a3c1e6f0c554..a9573cfc6a2a 100644 --- a/test/CodeGen/X86/darwin-bzero.ll +++ b/test/CodeGen/X86/darwin-bzero.ll @@ -3,6 +3,6 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) define void @foo(i8* %p, i32 %len) { - call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1); + call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1) ret void } diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll index 12a2ef30e17e..d1ba9a845800 100644 --- a/test/CodeGen/X86/extractelement-shuffle.ll +++ b/test/CodeGen/X86/extractelement-shuffle.ll @@ -6,8 +6,8 @@ ; through the 3rd mask element, which doesn't exist. define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone { entry: - %shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>; - %bit = bitcast <2 x i64> %shuf to <4 x i32>; - %res = extractelement <4 x i32> %bit, i32 3; - ret i32 %res; -}
\ No newline at end of file + %shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3> + %bit = bitcast <2 x i64> %shuf to <4 x i32> + %res = extractelement <4 x i32> %bit, i32 3 + ret i32 %res +} diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll index 3dcd736a1404..84b3fd7caf3a 100644 --- a/test/CodeGen/X86/fast-isel.ll +++ b/test/CodeGen/X86/fast-isel.ll @@ -14,7 +14,7 @@ fast: %t1 = mul i32 %t0, %s %t2 = sub i32 %t1, %s %t3 = and i32 %t2, %s - %t4 = or i32 %t3, %s + %t4 = xor i32 %t3, 3 %t5 = xor i32 %t4, %s %t6 = add i32 %t5, 2 %t7 = getelementptr i32* %y, i32 1 diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index eb182da10129..5525af25270f 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=x86 +; RUN: llc < %s -march=x86 | FileCheck %s %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] } %struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 } @stmt_obstack = external global %struct.obstack ; <%struct.obstack*> [#uses=1] -define void @expand_start_bindings() { +; This should just not crash. +define void @test1() nounwind { entry: - br i1 false, label %cond_true, label %cond_next + br i1 true, label %cond_true, label %cond_next cond_true: ; preds = %entry %new_size.0.i = select i1 false, i32 0, i32 0 ; <i32> [#uses=1] @@ -25,3 +26,22 @@ cond_false30.i: ; preds = %cond_true cond_next: ; preds = %entry ret void } + + + +define i32 @test2(i16* %P, i16* %Q) nounwind { + %A = load i16* %P, align 4 ; <i16> [#uses=11] + %C = zext i16 %A to i32 ; <i32> [#uses=1] + %D = and i32 %C, 255 ; <i32> [#uses=1] + br label %L +L: + + store i16 %A, i16* %Q + ret i32 %D + +; CHECK: test2: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: movzwl (%eax), %ecx + +} + diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll index 40589892bb6f..1f3b59a905b9 100644 --- a/test/CodeGen/X86/lsr-sort.ll +++ b/test/CodeGen/X86/lsr-sort.ll @@ -4,7 +4,7 @@ @X = common global i16 0 ; <i16*> [#uses=1] -define void @foo(i32 %N) nounwind { +define i32 @foo(i32 %N) nounwind { entry: %0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1] br i1 %0, label %bb, label %return @@ -18,5 +18,6 @@ bb: ; preds = %bb, %entry br i1 %exitcond, label %return, label %bb return: ; preds = %bb, %entry - ret void + %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] + ret i32 %h } diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll index eca9e6f436c2..069737d4d10d 100644 --- a/test/CodeGen/X86/mul-legalize.ll +++ b/test/CodeGen/X86/mul-legalize.ll @@ -19,6 +19,6 @@ return: ret void } -declare i1 @report__equal(i32 %x, i32 %y) nounwind; +declare i1 @report__equal(i32 %x, i32 %y) nounwind declare void @abort() diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll index 22b6f35a70ef..f52f8c7af8c1 100644 --- a/test/CodeGen/X86/private.ll +++ b/test/CodeGen/X86/private.ll @@ -11,7 +11,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll new file mode 100644 index 000000000000..c4f768ca529b --- /dev/null +++ b/test/CodeGen/X86/remat-mov-0.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4 + +; CodeGen should remat the zero instead of spilling it. + +declare void @foo(i64 %p) + +define void @bar() nounwind { + call void @foo(i64 0) + call void @foo(i64 0) + call void @foo(i64 0) + call void @foo(i64 0) + ret void +} diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll new file mode 100644 index 000000000000..b2b9f8121fd6 --- /dev/null +++ b/test/CodeGen/X86/sext-subreg.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; rdar://7529457 + +define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { +; CHECK: t: +; CHECK: movslq %e{{.*}}, %rax +; CHECK: movq %rax +; CHECK: movl %eax + %C = add i64 %A, %B + %D = trunc i64 %C to i32 + volatile store i32 %D, i32* %P + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + volatile store i64 %F, i64 *%P2 + volatile store i32 %D, i32* %P + ret i64 undef +} diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index d7623920ff28..7d85818d46b9 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6 +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 14 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll index 7aae9eb1ab96..f4847a31c81f 100644 --- a/test/CodeGen/X86/stride-nine-with-base-reg.ll +++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll @@ -7,6 +7,7 @@ @B = external global [1000 x i8], align 32 @A = external global [1000 x i8], align 32 @P = external global [1000 x i8], align 32 +@Q = external global [1000 x i8], align 32 define void @foo(i32 %m, i32 %p) nounwind { entry: @@ -24,6 +25,8 @@ bb: %tmp0 = add i32 %tmp8, %p %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0 store i8 17, i8* %tmp10, align 4 + %tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0 + store i8 19, i8* %tmp11, align 4 %indvar.next = add i32 %i.019.0, 1 %exitcond = icmp eq i32 %indvar.next, %m br i1 %exitcond, label %return, label %bb diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index 8c3cae9e8d4c..c5dbb04a051b 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -55,7 +55,7 @@ altret: ret void } -declare i8* @choose(i8*, i8*); +declare i8* @choose(i8*, i8*) ; BranchFolding should tail-duplicate the indirect jump to avoid ; redundant branching. diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll new file mode 100644 index 000000000000..8ddc4054ca88 --- /dev/null +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s + +declare fastcc i32 @callee(i32 %arg) +define fastcc i32 @directcall(i32 %arg) { +entry: +; This is the large code model, so &callee may not fit into the jmp +; instruction. Instead, stick it into a register. +; CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]] +; CHECK: jmpq *[[REGISTER]] # TAILCALL + %res = tail call fastcc i32 @callee(i32 %arg) + ret i32 %res +} + +; Check that the register used for an indirect tail call doesn't +; clobber any of the arguments. +define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: subq $8, %rsp +; Put the call target into R11, which won't be clobbered while restoring +; callee-saved registers and won't be used for passing arguments. +; CHECK: movq %rdi, %r11 +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; Adjust the stack to "return". +; CHECK: addq $8, %rsp +; And tail-call to the target. +; CHECK: jmpq *%r11 # TAILCALL + %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7) + ret i32 %res +} + +; Check that the register used for a direct tail call doesn't clobber +; any of the arguments. +declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32) +define fastcc i32 @direct_manyargs() { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: subq $8, %rsp +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; This is the large code model, so &manyargs_callee may not fit into +; the jmp instruction. Put it into R11, which won't be clobbered +; while restoring callee-saved registers and won't be used for passing +; arguments. +; CHECK: movabsq $manyargs_callee, %r11 +; Adjust the stack to "return". +; CHECK: addq $8, %rsp +; And tail-call to the target. +; CHECK: jmpq *%r11 # TAILCALL + %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4, + i32 5, i32 6, i32 7) + ret i32 %res +} diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll index 772ff6c3e766..f1063dcabf4f 100644 --- a/test/CodeGen/X86/test-nofold.ll +++ b/test/CodeGen/X86/test-nofold.ll @@ -1,22 +1,35 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x} +; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s ; rdar://5752025 -; We don't want to fold the and into the test, because the and clobbers its -; input forcing a copy. We want: -; movl $15, %ecx +; We want: +; CHECK: movl 4(%esp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: ret +; +; We don't want: +; movl 4(%esp), %eax +; movl %eax, %ecx # bad: extra copy +; andl $15, %ecx +; testl $15, %eax # bad: peep obstructed +; movl $42, %eax +; cmovel %ecx, %eax +; ret +; +; We also don't want: +; movl $15, %ecx # bad: larger encoding ; andl 4(%esp), %ecx -; testl %ecx, %ecx ; movl $42, %eax -; cmove %ecx, %eax +; cmovel %ecx, %eax ; ret ; -; Not: -; movl 4(%esp), %eax -; movl %eax, %ecx +; We also don't want: +; movl 4(%esp), %ecx ; andl $15, %ecx -; testl $15, %eax +; testl %ecx, %ecx # bad: unnecessary test ; movl $42, %eax -; cmove %ecx, %eax +; cmovel %ecx, %eax ; ret define i32 @t1(i32 %X) nounwind { diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll new file mode 100644 index 000000000000..a245ed7caa84 --- /dev/null +++ b/test/CodeGen/X86/twoaddr-lea.ll @@ -0,0 +1,24 @@ +;; X's live range extends beyond the shift, so the register allocator +;; cannot coalesce it with Y. Because of this, a copy needs to be +;; emitted before the shift to save the register value before it is +;; clobbered. However, this copy is not needed if the register +;; allocator turns the shift into an LEA. This also occurs for ADD. + +; Check that the shift gets turned into an LEA. +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ +; RUN: not grep {mov E.X, E.X} + +@G = external global i32 ; <i32*> [#uses=3] + +define i32 @test1(i32 %X, i32 %Y) { + %Z = add i32 %X, %Y ; <i32> [#uses=1] + volatile store i32 %Y, i32* @G + volatile store i32 %Z, i32* @G + ret i32 %X +} + +define i32 @test2(i32 %X) { + %Z = add i32 %X, 1 ; <i32> [#uses=1] + volatile store i32 %Z, i32* @G + ret i32 %X +} diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll new file mode 100644 index 000000000000..2dd2a4adac55 --- /dev/null +++ b/test/CodeGen/X86/use-add-flags.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=x86-64 -o - | FileCheck %s + +; Reuse the flags value from the add instructions instead of emitting separate +; testl instructions. + +; Use the flags on the add. + +; CHECK: add_zf: +; CHECK: addl (%rdi), %esi +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: ret + +define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { + %tmp2 = load i32* %x, align 4 ; <i32> [#uses=1] + %tmp4 = add i32 %tmp2, %y ; <i32> [#uses=1] + %tmp5 = icmp slt i32 %tmp4, 0 ; <i1> [#uses=1] + %tmp.0 = select i1 %tmp5, i32 %a, i32 %b ; <i32> [#uses=1] + ret i32 %tmp.0 +} + +declare void @foo(i32) + +; Don't use the flags result of the and here, since the and has no +; other use. A simple test is better. + +; CHECK: bar: +; CHECK: testb $16, %dil + +define void @bar(i32 %x) nounwind { + %y = and i32 %x, 16 + %t = icmp eq i32 %y, 0 + br i1 %t, label %true, label %false +true: + call void @foo(i32 %x) + ret void +false: + ret void +} + +; Do use the flags result of the and here, since the and has another use. + +; CHECK: qux: +; CHECK: andl $16, %edi +; CHECK-NEXT: jne + +define void @qux(i32 %x) nounwind { + %y = and i32 %x, 16 + %t = icmp eq i32 %y, 0 + br i1 %t, label %true, label %false +true: + call void @foo(i32 %y) + ret void +false: + ret void +} diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll new file mode 100644 index 000000000000..1f899b3c20aa --- /dev/null +++ b/test/CodeGen/X86/vec_cast.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86-64 -disable-mmx + +define <8 x i32> @a(<8 x i16> %a) nounwind { + %c = sext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <3 x i32> @b(<3 x i16> %a) nounwind { + %c = sext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <1 x i32> @c(<1 x i16> %a) nounwind { + %c = sext <1 x i16> %a to <1 x i32> + ret <1 x i32> %c +} + +define <8 x i32> @d(<8 x i16> %a) nounwind { + %c = zext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <3 x i32> @e(<3 x i16> %a) nounwind { + %c = zext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <1 x i32> @f(<1 x i16> %a) nounwind { + %c = zext <1 x i16> %a to <1 x i32> + ret <1 x i32> %c +} + +; TODO: Legalize doesn't yet handle this. +;define <8 x i16> @g(<8 x i32> %a) nounwind { +; %c = trunc <8 x i32> %a to <8 x i16> +; ret <8 x i16> %c +;} + +define <3 x i16> @h(<3 x i32> %a) nounwind { + %c = trunc <3 x i32> %a to <3 x i16> + ret <3 x i16> %c +} + +define <1 x i16> @i(<1 x i32> %a) nounwind { + %c = trunc <1 x i32> %a to <1 x i16> + ret <1 x i16> %c +} diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll index 02b16a79f4a0..8d2a3c31aedf 100644 --- a/test/CodeGen/X86/vec_ext_inreg.ll +++ b/test/CodeGen/X86/vec_ext_inreg.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86-64 -disable-mmx define <8 x i32> @a(<8 x i32> %a) nounwind { %b = trunc <8 x i32> %a to <8 x i16> diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll index 1cf37d4b9ba9..6807e4d63909 100644 --- a/test/CodeGen/X86/vec_shuffle-22.ll +++ b/test/CodeGen/X86/vec_shuffle-22.ll @@ -9,7 +9,7 @@ define <4 x float> @t1(<4 x float> %a) nounwind { define <4 x i32> @t2(<4 x i32>* %a) nounwind { ; CHECK: pshufd ; CHECK: ret - %tmp1 = load <4 x i32>* %a; + %tmp1 = load <4 x i32>* %a %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1] ret <4 x i32> %tmp2 } diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll index 2aa2d252849c..d9b2388809aa 100644 --- a/test/CodeGen/X86/vec_shuffle-25.ll +++ b/test/CodeGen/X86/vec_shuffle-25.ll @@ -19,16 +19,16 @@ entry: %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2] %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2] %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - %unpcklps14a = shufflevector <4 x float> %unpcklps14, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; + %unpcklps14a = shufflevector <4 x float> %unpcklps14, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %unpckhps17a = shufflevector <4 x float> %unpckhps17, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r1 = shufflevector <16 x float> %unpcklps14a, <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>; + %unpckhps17a = shufflevector <4 x float> %unpckhps17, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r1 = shufflevector <16 x float> %unpcklps14a, <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - %unpcklps20a = shufflevector <4 x float> %unpcklps20, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r2 = shufflevector <16 x float> %r1, <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>; + %unpcklps20a = shufflevector <4 x float> %unpcklps20, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r2 = shufflevector <16 x float> %r1, <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15> %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %unpckhps23a = shufflevector <4 x float> %unpckhps23, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r3 = shufflevector <16 x float> %r2, <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>; - %r4 = shufflevector <16 x float> %r3, <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>; - ret <8 x float> %r4; + %unpckhps23a = shufflevector <4 x float> %unpckhps23, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r3 = shufflevector <16 x float> %r2, <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> + %r4 = shufflevector <16 x float> %r3, <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x float> %r4 } diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll index 8cc15d1e7c27..086af6bb114b 100644 --- a/test/CodeGen/X86/vec_shuffle-26.ll +++ b/test/CodeGen/X86/vec_shuffle-26.ll @@ -20,10 +20,10 @@ entry: %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2] %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %r1 = shufflevector <4 x float> %unpcklps14, <4 x float> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >; + %r1 = shufflevector <4 x float> %unpcklps14, <4 x float> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %r2 = shufflevector <4 x float> %unpcklps20, <4 x float> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >; + %r2 = shufflevector <4 x float> %unpcklps20, <4 x float> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > ; %r3 = shufflevector <8 x float> %r1, <8 x float> %r2, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; - ret <8 x float> %r2; + ret <8 x float> %r2 } diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll index 4154433fa704..d9de892933e0 100644 --- a/test/CodeGen/X86/widen_select-1.ll +++ b/test/CodeGen/X86/widen_select-1.ll @@ -6,7 +6,7 @@ define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind { entry: %x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2 - %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >; + %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > store <6 x i32> %val, <6 x i32>* %dst.addr ret void } diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index dd02241c1dd6..47dba4b4a04b 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -7,7 +7,7 @@ define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> - %val = fadd <3 x float> %x, %src2; + %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void } diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll index d097e4142bcc..9374a028631d 100644 --- a/test/CodeGen/X86/widen_shuffle-2.ll +++ b/test/CodeGen/X86/widen_shuffle-2.ll @@ -7,7 +7,7 @@ define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> - %val = fadd <3 x float> %x, %src2; + %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void } diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll index 3c7389111267..2465f23a7689 100644 --- a/test/CodeGen/X86/x86-64-and-mask.ll +++ b/test/CodeGen/X86/x86-64-and-mask.ll @@ -1,12 +1,49 @@ -; RUN: llc < %s | grep {movl.*%edi, %eax} -; This should be a single mov, not a load of immediate + andq. +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" -define i64 @test(i64 %x) nounwind { +; This should be a single mov, not a load of immediate + andq. +; CHECK: test: +; CHECK: movl %edi, %eax + +define i64 @test(i64 %x) nounwind { entry: %tmp123 = and i64 %x, 4294967295 ; <i64> [#uses=1] ret i64 %tmp123 } +; This copy can't be coalesced away because it needs the implicit zero-extend. +; CHECK: bbb: +; CHECK: movl %edi, %edi + +define void @bbb(i64 %x) nounwind { + %t = and i64 %x, 4294967295 + call void @foo(i64 %t) + ret void +} + +; This should use a 32-bit and with implicit zero-extension, not a 64-bit and +; with a separate mov to materialize the mask. +; rdar://7527390 +; CHECK: ccc: +; CHECK: andl $-1048593, %edi + +declare void @foo(i64 %x) nounwind + +define void @ccc(i64 %x) nounwind { + %t = and i64 %x, 4293918703 + call void @foo(i64 %t) + ret void +} + +; This requires a mov and a 64-bit and. +; CHECK: ddd: +; CHECK: movabsq $4294967296, %rax +; CHECK: andq %rax, %rdi + +define void @ddd(i64 %x) nounwind { + %t = and i64 %x, 4294967296 + call void @foo(i64 %t) + ret void +} diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll index 5ed6a23ef876..11b40c897618 100644 --- a/test/CodeGen/X86/x86-64-jumps.ll +++ b/test/CodeGen/X86/x86-64-jumps.ll @@ -14,3 +14,32 @@ bb6: ; preds = %entry ret i8 2 } + +; PR5930 - Trunc of block address differences. +@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1] + +define void @test2(i32 %i) nounwind ssp { +entry: + %i.addr = alloca i32 ; <i32*> [#uses=2] + store i32 %i, i32* %i.addr + %tmp = load i32* %i.addr ; <i32> [#uses=1] + %idxprom = sext i32 %tmp to i64 ; <i64> [#uses=1] + %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1] + %tmp1 = load i32* %arrayidx ; <i32> [#uses=1] + %idx.ext = sext i32 %tmp1 to i64 ; <i64> [#uses=1] + %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1] + br label %indirectgoto + +foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto + br label %bar + +bar: ; preds = %foo, %indirectgoto + br label %hack + +hack: ; preds = %bar, %indirectgoto + ret void + +indirectgoto: ; preds = %entry + %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; <i8*> [#uses=1] + indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo] +} diff --git a/test/CodeGen/X86/brcond-srl.ll b/test/CodeGen/X86/xor-icmp.ll index 12674e91a0bd..a6bdb13ec6b4 100644 --- a/test/CodeGen/X86/brcond-srl.ll +++ b/test/CodeGen/X86/xor-icmp.ll @@ -1,13 +1,20 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -; rdar://7475489 +; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 define i32 @t(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: t: -; CHECK: xorb -; CHECK-NOT: andb -; CHECK-NOT: shrb -; CHECK: testb $64 +; X32: t: +; X32: xorb +; X32-NOT: andb +; X32-NOT: shrb +; X32: testb $64 +; X32: jne + +; X64: t: +; X64-NOT: setne +; X64: xorl +; X64: testb $64 +; X64: jne %0 = and i32 %a, 16384 %1 = icmp ne i32 %0, 0 %2 = and i32 %b, 16384 |
