diff options
Diffstat (limited to 'test/CodeGen/CellSPU')
45 files changed, 6052 insertions, 0 deletions
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll new file mode 100644 index 000000000000..75e0ed0cd2fc --- /dev/null +++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as < %s | llc -march=cellspu -o - | grep brz +; PR3274 + +target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128" +target triple = "spu" + %struct.anon = type { i64 } + %struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon } + +define double @__floatunsidf(i32 %arg_a) nounwind { +entry: + %in = alloca %struct.fp_number_type, align 16 + %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 + store i32 0, i32* %0, align 4 + %1 = icmp eq i32 %arg_a, 0 + %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + store i32 2, i32* %2, align 8 + br label %bb7 + +bb1: ; preds = %entry + ret double 0.0 + +bb7: ; preds = %bb5, %bb1, %bb + ret double 1.0 +} + +; declare i32 @llvm.ctlz.i32(i32) nounwind readnone + +declare double @__pack_d(%struct.fp_number_type*) diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll new file mode 100644 index 000000000000..a18b6f8d05fc --- /dev/null +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -0,0 +1,279 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 234 +; RUN: grep andc %t1.s | count 85 +; RUN: grep andi %t1.s | count 37 +; RUN: grep andhi %t1.s | count 30 +; RUN: grep andbi %t1.s | count 4 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; AND instruction generation: +define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 + ret <4 x i32> %A +} + +define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg2, %arg1 + ret <4 x i32> %A +} + +define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg1, %arg2 + ret <8 x i16> %A +} + +define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg2, %arg1 + ret <8 x i16> %A +} + +define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg2, %arg1 + ret <16 x i8> %A +} + +define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg1, %arg2 + ret <16 x i8> %A +} + +define i32 @and_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @and_i32_2(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 + ret i32 %A +} + +define i16 @and_i16_1(i16 %arg1, i16 %arg2) { + %A = and i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @and_i16_2(i16 %arg1, i16 %arg2) { + %A = and i16 %arg1, %arg2 + ret i16 %A +} + +define i8 @and_i8_1(i8 %arg1, i8 %arg2) { + %A = and i8 %arg2, %arg1 + ret i8 %A +} + +define i8 @and_i8_2(i8 %arg1, i8 %arg2) { + %A = and i8 %arg1, %arg2 + ret i8 %A +} + +; ANDC instruction generation: +define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %arg1, %A + ret <4 x i32> %B +} + +define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %arg2, %A + ret <4 x i32> %B +} + +define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %arg2 + ret <4 x i32> %B +} + +define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %arg1, %A + ret <8 x i16> %B +} + +define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %arg2, %A + ret <8 x i16> %B +} + +define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg2, %A + ret <16 x i8> %B +} + +define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg1, %A + ret <16 x i8> %B +} + +define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 + ret <16 x i8> %B +} + +define i32 @andc_i32_1(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = and i32 %A, %arg1 + ret i32 %B +} + +define i32 @andc_i32_2(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg1, -1 + %B = and i32 %A, %arg2 + ret i32 %B +} + +define i32 @andc_i32_3(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = and i32 %arg1, %A + ret i32 %B +} + +define i16 @andc_i16_1(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = and i16 %A, %arg1 + ret i16 %B +} + +define i16 @andc_i16_2(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg1, -1 + %B = and i16 %A, %arg2 + ret i16 %B +} + +define i16 @andc_i16_3(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = and i16 %arg1, %A + ret i16 %B +} + +define i8 @andc_i8_1(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = and i8 %A, %arg1 + ret i8 %B +} + +define i8 @andc_i8_2(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg1, -1 + %B = and i8 %A, %arg2 + ret i8 %B +} + +define i8 @andc_i8_3(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = and i8 %arg1, %A + ret i8 %B +} + +; ANDI instruction generation (i32 data type): +define <4 x i32> @andi_v4i32_1(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_2(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_3(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_4(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 > + ret <4 x i32> %tmp2 +} + +define i32 @andi_u32(i32 zeroext %in) zeroext { + %tmp37 = and i32 %in, 37 + ret i32 %tmp37 +} + +define i32 @andi_i32(i32 signext %in) signext { + %tmp38 = and i32 %in, 37 + ret i32 %tmp38 +} + +define i32 @andi_i32_1(i32 %in) { + %tmp37 = and i32 %in, 37 + ret i32 %tmp37 +} + +; ANDHI instruction generation (i16 data type): +define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511, + i16 511, i16 511, i16 511, i16 511 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510, + i16 510, i16 510, i16 510, i16 510 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512, + i16 -512, i16 -512, i16 -512, i16 -512 > + ret <8 x i16> %tmp2 +} + +define i16 @andhi_u16(i16 zeroext %in) zeroext { + %tmp37 = and i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp37 +} + +define i16 @andhi_i16(i16 signext %in) signext { + %tmp38 = and i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp38 +} + +; i8 data type (s/b ANDBI if 8-bit registers were supported): +define <16 x i8> @and_v16i8(<16 x i8> %in) { + ; ANDBI generated for vector types + %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42 > + ret <16 x i8> %tmp2 +} + +define i8 @and_u8(i8 zeroext %in) zeroext { + ; ANDBI generated: + %tmp37 = and i8 %in, 37 + ret i8 %tmp37 +} + +define i8 @and_sext8(i8 signext %in) signext { + ; ANDBI generated + %tmp38 = and i8 %in, 37 + ret i8 %tmp38 +} + +define i8 @and_i8(i8 %in) { + ; ANDBI generated + %tmp38 = and i8 %in, 205 + ret i8 %tmp38 +} diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll new file mode 100644 index 000000000000..a305a2354041 --- /dev/null +++ b/test/CodeGen/CellSPU/call.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep brsl %t1.s | count 1 +; RUN: grep brasl %t1.s | count 1 +; RUN: grep stqd %t1.s | count 80 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i32 @main() { +entry: + %a = call i32 @stub_1(i32 1, float 0x400921FA00000000) + call void @extern_stub_1(i32 %a, i32 4) + ret i32 %a +} + +declare void @extern_stub_1(i32, i32) + +define i32 @stub_1(i32 %x, float %y) { +entry: + ret i32 0 +} + +; vararg call: ensure that all caller-saved registers are spilled to the +; stack: +define i32 @stub_2(...) { +entry: + ret i32 0 +} diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll new file mode 100644 index 000000000000..9be714ebc9b8 --- /dev/null +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -0,0 +1,49 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s +; RUN: grep bisl %t1.s | count 7 +; RUN: grep ila %t1.s | count 1 +; RUN: grep rotqby %t1.s | count 6 +; RUN: grep lqa %t1.s | count 1 +; RUN: grep lqd %t1.s | count 12 +; RUN: grep dispatch_tab %t1.s | count 5 +; RUN: grep bisl %t2.s | count 7 +; RUN: grep ilhu %t2.s | count 2 +; RUN: grep iohl %t2.s | count 2 +; RUN: grep rotqby %t2.s | count 6 +; RUN: grep lqd %t2.s | count 13 +; RUN: grep ilhu %t2.s | count 2 +; RUN: grep ai %t2.s | count 9 +; RUN: grep dispatch_tab %t2.s | count 6 + +; ModuleID = 'call_indirect.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" +target triple = "spu-unknown-elf" + +@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16 + +define void @dispatcher(i32 %i_arg, float %f_arg) { +entry: + %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16 + tail call void %tmp2( i32 %i_arg, float %f_arg ) + %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4 + tail call void %tmp2.1( i32 %i_arg, float %f_arg ) + %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4 + tail call void %tmp2.2( i32 %i_arg, float %f_arg ) + %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4 + tail call void %tmp2.3( i32 %i_arg, float %f_arg ) + %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4 + tail call void %tmp2.4( i32 %i_arg, float %f_arg ) + %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4 + tail call void %tmp2.5( i32 %i_arg, float %f_arg ) + ret void +} + +@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4 +@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16 + +define void @double_indirect_call() { + %a = load void ()*** @ptr.a, align 16 + %b = load void ()** %a, align 4 + tail call void %b() + ret void +} diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll new file mode 100644 index 000000000000..3c7ee7aeea2b --- /dev/null +++ b/test/CodeGen/CellSPU/ctpop.ll @@ -0,0 +1,30 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep cntb %t1.s | count 3 +; RUN: grep andi %t1.s | count 3 +; RUN: grep rotmi %t1.s | count 2 +; RUN: grep rothmi %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare i8 @llvm.ctpop.i8(i8) +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) + +define i32 @test_i8(i8 %X) { + call i8 @llvm.ctpop.i8(i8 %X) + %Y = zext i8 %1 to i32 + ret i32 %Y +} + +define i32 @test_i16(i16 %X) { + call i16 @llvm.ctpop.i16(i16 %X) + %Y = zext i16 %1 to i32 + ret i32 %Y +} + +define i32 @test_i32(i32 %X) { + call i32 @llvm.ctpop.i32(i32 %X) + %Y = bitcast i32 %1 to i32 + ret i32 %Y +} + diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp new file mode 100644 index 000000000000..d41647991a0a --- /dev/null +++ b/test/CodeGen/CellSPU/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target CellSPU] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +} diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll new file mode 100644 index 000000000000..2579a404eea5 --- /dev/null +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -0,0 +1,102 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep dfa %t1.s | count 2 +; RUN: grep dfs %t1.s | count 2 +; RUN: grep dfm %t1.s | count 6 +; RUN: grep dfma %t1.s | count 2 +; RUN: grep dfms %t1.s | count 2 +; RUN: grep dfnms %t1.s | count 4 +; +; This file includes double precision floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define double @fadd(double %arg1, double %arg2) { + %A = add double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = add <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fsub(double %arg1, double %arg2) { + %A = sub double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = sub <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fmul(double %arg1, double %arg2) { + %A = mul double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = mul <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fma(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = add double %A, %arg3 + ret double %B +} + +define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = add <2 x double> %A, %arg3 + ret <2 x double> %B +} + +define double @fms(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + ret double %B +} + +define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %A, %arg3 + ret <2 x double> %B +} + +; - (a * b - c) +define double @d_fnms_1(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + %C = sub double -0.000000e+00, %B ; <double> [#uses=1] + ret double %C +} + +; Annother way of getting fnms +; - ( a * b ) + c => c - (a * b) +define double @d_fnms_2(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %arg3, %A + ret double %B +} + +; FNMS: - (a * b - c) => c - (a * b) +define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %arg3, %A ; + ret <2 x double> %B +} + +; Another way to get fnms using a constant vector +; - ( a * b - c) +define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1] + %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1] + %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B + ret <2 x double> %C +} + +;define double @fdiv_1(double %arg1, double %arg2) { +; %A = fdiv double %arg1, %arg2 ; <double> [#uses=1] +; ret double %A +;} diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll new file mode 100644 index 000000000000..540695677205 --- /dev/null +++ b/test/CodeGen/CellSPU/eqv.ll @@ -0,0 +1,152 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep eqv %t1.s | count 18 +; RUN: grep xshw %t1.s | count 6 +; RUN: grep xsbh %t1.s | count 3 +; RUN: grep andi %t1.s | count 3 + +; Test the 'eqv' instruction, whose boolean expression is: +; (a & b) | (~a & ~b), which simplifies to +; (a & b) | ~(a | b) +; Alternatively, a ^ ~b, which the compiler will also match. + +; ModuleID = 'eqv.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 + %B = or <4 x i32> %arg1, %arg2 + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > + %C = or <4 x i32> %A, %Bnot + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) { + %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %C = xor <4 x i32> %arg1, %arg2not + ret <4 x i32> %C +} + +define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 ; <i32> [#uses=1] + %B = or i32 %arg1, %arg2 ; <i32> [#uses=1] + %Bnot = xor i32 %B, -1 ; <i32> [#uses=1] + %C = or i32 %A, %Bnot ; <i32> [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) { + %B = or i32 %arg1, %arg2 ; <i32> [#uses=1] + %Bnot = xor i32 %B, -1 ; <i32> [#uses=1] + %A = and i32 %arg1, %arg2 ; <i32> [#uses=1] + %C = or i32 %A, %Bnot ; <i32> [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) { + %B = or i32 %arg1, %arg2 ; <i32> [#uses=1] + %A = and i32 %arg1, %arg2 ; <i32> [#uses=1] + %Bnot = xor i32 %B, -1 ; <i32> [#uses=1] + %C = or i32 %A, %Bnot ; <i32> [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) { + %arg2not = xor i32 %arg2, -1 + %C = xor i32 %arg1, %arg2not + ret i32 %C +} + +define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) { + %arg1not = xor i32 %arg1, -1 + %C = xor i32 %arg2, %arg1not + ret i32 %C +} + +define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] + %B = or i16 %arg1, %arg2 ; <i16> [#uses=1] + %Bnot = xor i16 %B, -1 ; <i16> [#uses=1] + %C = or i16 %A, %Bnot ; <i16> [#uses=1] + ret i16 %C +} + +define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext { + %B = or i16 %arg1, %arg2 ; <i16> [#uses=1] + %Bnot = xor i16 %B, -1 ; <i16> [#uses=1] + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] + %C = or i16 %A, %Bnot ; <i16> [#uses=1] + ret i16 %C +} + +define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext { + %B = or i16 %arg1, %arg2 ; <i16> [#uses=1] + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] + %Bnot = xor i16 %B, -1 ; <i16> [#uses=1] + %C = or i16 %A, %Bnot ; <i16> [#uses=1] + ret i16 %C +} + +define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} + +define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext { + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} + +define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext { + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %B = or i8 %arg1, %arg2 ; <i8> [#uses=1] + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %Bnot = xor i8 %B, -1 ; <i8> [#uses=1] + %C = or i8 %A, %Bnot ; <i8> [#uses=1] + ret i8 %C +} diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll new file mode 100644 index 000000000000..bcd2f42aa77e --- /dev/null +++ b/test/CodeGen/CellSPU/extract_elt.ll @@ -0,0 +1,277 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep shufb %t1.s | count 39 +; RUN: grep ilhu %t1.s | count 27 +; RUN: grep iohl %t1.s | count 27 +; RUN: grep lqa %t1.s | count 10 +; RUN: grep shlqby %t1.s | count 12 +; RUN: grep 515 %t1.s | count 1 +; RUN: grep 1029 %t1.s | count 2 +; RUN: grep 1543 %t1.s | count 2 +; RUN: grep 2057 %t1.s | count 2 +; RUN: grep 2571 %t1.s | count 2 +; RUN: grep 3085 %t1.s | count 2 +; RUN: grep 3599 %t1.s | count 2 +; RUN: grep 32768 %t1.s | count 1 +; RUN: grep 32769 %t1.s | count 1 +; RUN: grep 32770 %t1.s | count 1 +; RUN: grep 32771 %t1.s | count 1 +; RUN: grep 32772 %t1.s | count 1 +; RUN: grep 32773 %t1.s | count 1 +; RUN: grep 32774 %t1.s | count 1 +; RUN: grep 32775 %t1.s | count 1 +; RUN: grep 32776 %t1.s | count 1 +; RUN: grep 32777 %t1.s | count 1 +; RUN: grep 32778 %t1.s | count 1 +; RUN: grep 32779 %t1.s | count 1 +; RUN: grep 32780 %t1.s | count 1 +; RUN: grep 32781 %t1.s | count 1 +; RUN: grep 32782 %t1.s | count 1 +; RUN: grep 32783 %t1.s | count 1 +; RUN: grep 32896 %t1.s | count 24 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i32 @i32_extract_0(<4 x i32> %v) { +entry: + %a = extractelement <4 x i32> %v, i32 0 + ret i32 %a +} + +define i32 @i32_extract_1(<4 x i32> %v) { +entry: + %a = extractelement <4 x i32> %v, i32 1 + ret i32 %a +} + +define i32 @i32_extract_2(<4 x i32> %v) { +entry: + %a = extractelement <4 x i32> %v, i32 2 + ret i32 %a +} + +define i32 @i32_extract_3(<4 x i32> %v) { +entry: + %a = extractelement <4 x i32> %v, i32 3 + ret i32 %a +} + +define i16 @i16_extract_0(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 0 + ret i16 %a +} + +define i16 @i16_extract_1(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 1 + ret i16 %a +} + +define i16 @i16_extract_2(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 2 + ret i16 %a +} + +define i16 @i16_extract_3(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 3 + ret i16 %a +} + +define i16 @i16_extract_4(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 4 + ret i16 %a +} + +define i16 @i16_extract_5(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 5 + ret i16 %a +} + +define i16 @i16_extract_6(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 6 + ret i16 %a +} + +define i16 @i16_extract_7(<8 x i16> %v) { +entry: + %a = extractelement <8 x i16> %v, i32 7 + ret i16 %a +} + +define i8 @i8_extract_0(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 0 + ret i8 %a +} + +define i8 @i8_extract_1(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 1 + ret i8 %a +} + +define i8 @i8_extract_2(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 2 + ret i8 %a +} + +define i8 @i8_extract_3(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 3 + ret i8 %a +} + +define i8 @i8_extract_4(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 4 + ret i8 %a +} + +define i8 @i8_extract_5(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 5 + ret i8 %a +} + +define i8 @i8_extract_6(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 6 + ret i8 %a +} + +define i8 @i8_extract_7(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 7 + ret i8 %a +} + +define i8 @i8_extract_8(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 8 + ret i8 %a +} + +define i8 @i8_extract_9(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 9 + ret i8 %a +} + +define i8 @i8_extract_10(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 10 + ret i8 %a +} + +define i8 @i8_extract_11(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 11 + ret i8 %a +} + +define i8 @i8_extract_12(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 12 + ret i8 %a +} + +define i8 @i8_extract_13(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 13 + ret i8 %a +} + +define i8 @i8_extract_14(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 14 + ret i8 %a +} + +define i8 @i8_extract_15(<16 x i8> %v) { +entry: + %a = extractelement <16 x i8> %v, i32 15 + ret i8 %a +} + +;;-------------------------------------------------------------------------- +;; extract element, variable index: +;;-------------------------------------------------------------------------- + +define i8 @extract_varadic_i8(i32 %i) nounwind readnone { +entry: + %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i + ret i8 %0 +} + +define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <16 x i8> %v, i32 %i + ret i8 %0 +} + +define i16 @extract_varadic_i16(i32 %i) nounwind readnone { +entry: + %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i + ret i16 %0 +} + +define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <8 x i16> %v, i32 %i + ret i16 %0 +} + +define i32 @extract_varadic_i32(i32 %i) nounwind readnone { +entry: + %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i + ret i32 %0 +} + +define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <4 x i32> %v, i32 %i + ret i32 %0 +} + +define float @extract_varadic_f32(i32 %i) nounwind readnone { +entry: + %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i + ret float %0 +} + +define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <4 x float> %v, i32 %i + ret float %0 +} + +define i64 @extract_varadic_i64(i32 %i) nounwind readnone { +entry: + %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i + ret i64 %0 +} + +define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <2 x i64> %v, i32 %i + ret i64 %0 +} + +define double @extract_varadic_f64(i32 %i) nounwind readnone { +entry: + %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i + ret double %0 +} + +define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone { +entry: + %0 = extractelement <2 x double> %v, i32 %i + ret double %0 +} diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll new file mode 100644 index 000000000000..27a659e82930 --- /dev/null +++ b/test/CodeGen/CellSPU/fcmp32.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fceq %t1.s | count 1 +; RUN: grep fcmeq %t1.s | count 1 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; Exercise the floating point comparison operators for f32: + +declare double @fabs(double) +declare float @fabsf(float) + +define i1 @fcmp_eq(float %arg1, float %arg2) { + %A = fcmp oeq float %arg1, %arg2 + ret i1 %A +} + +define i1 @fcmp_mag_eq(float %arg1, float %arg2) { + %1 = call float @fabsf(float %arg1) + %2 = call float @fabsf(float %arg2) + %3 = fcmp oeq float %1, %2 + ret i1 %3 +} diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll new file mode 100644 index 000000000000..1906bfe7ddaa --- /dev/null +++ b/test/CodeGen/CellSPU/fcmp64.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s + +define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind { +entry: + %A = fcmp oeq double %arg1, %arg2 + ret i1 %A +} diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll new file mode 100644 index 000000000000..d121c3f8c907 --- /dev/null +++ b/test/CodeGen/CellSPU/fdiv.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep frest %t1.s | count 2 +; RUN: grep -w fi %t1.s | count 2 +; RUN: grep -w fm %t1.s | count 2 +; RUN: grep fma %t1.s | count 2 +; RUN: grep fnms %t1.s | count 4 +; RUN: grep cgti %t1.s | count 2 +; RUN: grep selb %t1.s | count 2 +; +; This file includes standard floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define float @fdiv32(float %arg1, float %arg2) { + %A = fdiv float %arg1, %arg2 + ret float %A +} + +define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %A = fdiv <4 x float> %arg1, %arg2 + ret <4 x float> %A +} diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll new file mode 100644 index 000000000000..4c6fbb95a39f --- /dev/null +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -0,0 +1,42 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep 32768 %t1.s | count 2 +; RUN: grep xor %t1.s | count 4 +; RUN: grep and %t1.s | count 2 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define double @fneg_dp(double %X) { + %Y = sub double -0.000000e+00, %X + ret double %Y +} + +define <2 x double> @fneg_dp_vec(<2 x double> %X) { + %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X + ret <2 x double> %Y +} + +define float @fneg_sp(float %X) { + %Y = sub float -0.000000e+00, %X + ret float %Y +} + +define <4 x float> @fneg_sp_vec(<4 x float> %X) { + %Y = sub <4 x float> <float -0.000000e+00, float -0.000000e+00, + float -0.000000e+00, float -0.000000e+00>, %X + ret <4 x float> %Y +} + +declare double @fabs(double) + +declare float @fabsf(float) + +define double @fabs_dp(double %X) { + %Y = call double @fabs( double %X ) + ret double %Y +} + +define float @fabs_sp(float %X) { + %Y = call float @fabsf( float %X ) + ret float %Y +} diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll new file mode 100644 index 000000000000..dd6782772a5d --- /dev/null +++ b/test/CodeGen/CellSPU/i64ops.ll @@ -0,0 +1,57 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep xswd %t1.s | count 3 +; RUN: grep xsbh %t1.s | count 1 +; RUN: grep xshw %t1.s | count 2 +; RUN: grep shufb %t1.s | count 7 +; RUN: grep cg %t1.s | count 4 +; RUN: grep addx %t1.s | count 4 +; RUN: grep fsmbi %t1.s | count 3 +; RUN: grep il %t1.s | count 2 +; RUN: grep mpy %t1.s | count 10 +; RUN: grep mpyh %t1.s | count 6 +; RUN: grep mpyhhu %t1.s | count 2 +; RUN: grep mpyu %t1.s | count 4 + +; ModuleID = 'stores.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i64 @sext_i64_i8(i8 %a) nounwind { + %1 = sext i8 %a to i64 + ret i64 %1 +} + +define i64 @sext_i64_i16(i16 %a) nounwind { + %1 = sext i16 %a to i64 + ret i64 %1 +} + +define i64 @sext_i64_i32(i32 %a) nounwind { + %1 = sext i32 %a to i64 + ret i64 %1 +} + +define i64 @zext_i64_i8(i8 %a) nounwind { + %1 = zext i8 %a to i64 + ret i64 %1 +} + +define i64 @zext_i64_i16(i16 %a) nounwind { + %1 = zext i16 %a to i64 + ret i64 %1 +} + +define i64 @zext_i64_i32(i32 %a) nounwind { + %1 = zext i32 %a to i64 + ret i64 %1 +} + +define i64 @add_i64(i64 %a, i64 %b) nounwind { + %1 = add i64 %a, %b + ret i64 %1 +} + +define i64 @mul_i64(i64 %a, i64 %b) nounwind { + %1 = mul i64 %a, %b + ret i64 %1 +} diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll new file mode 100644 index 000000000000..23a036e37443 --- /dev/null +++ b/test/CodeGen/CellSPU/i8ops.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s + +; ModuleID = 'i8ops.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i8 @add_i8(i8 %a, i8 %b) nounwind { + %1 = add i8 %a, %b + ret i8 %1 +} + +define i8 @add_i8_imm(i8 %a, i8 %b) nounwind { + %1 = add i8 %a, 15 + ret i8 %1 +} + +define i8 @sub_i8(i8 %a, i8 %b) nounwind { + %1 = sub i8 %a, %b + ret i8 %1 +} + +define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind { + %1 = sub i8 %a, 15 + ret i8 %1 +} diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll new file mode 100644 index 000000000000..56d1b8fb41b2 --- /dev/null +++ b/test/CodeGen/CellSPU/icmp16.ll @@ -0,0 +1,350 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ilh %t1.s | count 15 +; RUN: grep ceqh %t1.s | count 29 +; RUN: grep ceqhi %t1.s | count 13 +; RUN: grep clgth %t1.s | count 15 +; RUN: grep cgth %t1.s | count 14 +; RUN: grep cgthi %t1.s | count 6 +; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17 +; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 +; $3 = %arg1, $4 = %val1, $5 = %val2 +; +; For "positive" comparisons: +; selb $3, $6, $5, <i1> +; selb $3, $5, $4, <i1> +; +; For "negative" comparisons, i.e., those where the result of the comparison +; must be inverted (setne, for example): +; selb $3, $5, $6, <i1> +; selb $3, $4, $5, <i1> + +; i16 integer comparisons: +define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, 511 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, -512 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, -1 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp eq i16 %arg1, 32768 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, 511 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, -512 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, -1 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ne i16 %arg1, 32768 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, 500 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, 0 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, 65024 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ugt i16 %arg1, 32768 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp uge i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp uge i16 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp uge i16 %arg1, <immed> can always be transformed into +;; icmp ugt i16 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, 511 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, 65534 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, 65024 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ult i16 %arg1, 32769 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ule i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp ule i16 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp ule i16 %arg1, <immed> can always be transformed into +;; icmp ult i16 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, 511 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, -1 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, -512 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sgt i16 %arg1, 32768 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sge i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sge i16 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sge i16 %arg1, <immed> can always be transformed into +;; icmp sgt i16 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, %arg2 + ret i1 %A +} + +define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, 511 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, -512 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, -1 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp slt i16 %arg1, 32768 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sle i16 %arg1, %arg2 + %B = select i1 %A, i16 %val1, i16 %val2 + ret i16 %B +} + +define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +entry: + %A = icmp sle i16 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sle i16 %arg1, <immed> can always be transformed into +;; icmp slt i16 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll new file mode 100644 index 000000000000..4f74b0dd0429 --- /dev/null +++ b/test/CodeGen/CellSPU/icmp32.ll @@ -0,0 +1,350 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ila %t1.s | count 6 +; RUN: grep ceq %t1.s | count 28 +; RUN: grep ceqi %t1.s | count 12 +; RUN: grep clgt %t1.s | count 16 +; RUN: grep clgti %t1.s | count 6 +; RUN: grep cgt %t1.s | count 16 +; RUN: grep cgti %t1.s | count 6 +; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 +; $3 = %arg1, $4 = %val1, $5 = %val2 +; +; For "positive" comparisons: +; selb $3, $6, $5, <i1> +; selb $3, $5, $4, <i1> +; +; For "negative" comparisons, i.e., those where the result of the comparison +; must be inverted (setne, for example): +; selb $3, $5, $6, <i1> +; selb $3, $4, $5, <i1> + +; i32 integer comparisons: +define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, -512 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, -1 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp eq i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, -512 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, -1 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ne i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, 4294966784 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, 4294967293 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ugt i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp uge i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp uge i32 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp uge i32 %arg1, <immed> can always be transformed into +;; icmp ugt i32 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, 4294966784 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, 4294967293 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ult i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ule i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp ule i32 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp ule i32 %arg1, <immed> can always be transformed into +;; icmp ult i32 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, 4294966784 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, 4294967293 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sgt i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sge i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sge i32 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sge i32 %arg1, <immed> can always be transformed into +;; icmp sgt i32 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, %arg2 + ret i1 %A +} + +define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, 511 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, -512 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, -1 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp slt i32 %arg1, 32768 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sle i32 %arg1, %arg2 + %B = select i1 %A, i32 %val1, i32 %val2 + ret i32 %B +} + +define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +entry: + %A = icmp sle i32 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sle i32 %arg1, <immed> can always be transformed into +;; icmp slt i32 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll new file mode 100644 index 000000000000..b26252cedb30 --- /dev/null +++ b/test/CodeGen/CellSPU/icmp64.ll @@ -0,0 +1,146 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceq %t1.s | count 20 +; RUN: grep cgti %t1.s | count 12 +; RUN: grep cgt %t1.s | count 16 +; RUN: grep clgt %t1.s | count 12 +; RUN: grep gb %t1.s | count 12 +; RUN: grep fsm %t1.s | count 10 +; RUN: grep xori %t1.s | count 5 +; RUN: grep selb %t1.s | count 18 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 +; $3 = %arg1, $4 = %val1, $5 = %val2 +; +; i64 integer comparisons: +define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp eq i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp eq i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ne i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ne i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ugt i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ugt i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp uge i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp uge i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ult i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ult i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ule i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ule i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sgt i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sgt i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sge i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sge i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp slt i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp slt i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sle i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp sle i64 %arg1, %arg2 + ret i1 %A +} diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll new file mode 100644 index 000000000000..d246481f03a1 --- /dev/null +++ b/test/CodeGen/CellSPU/icmp8.ll @@ -0,0 +1,286 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceqb %t1.s | count 24 +; RUN: grep ceqbi %t1.s | count 12 +; RUN: grep clgtb %t1.s | count 11 +; RUN: grep cgtb %t1.s | count 13 +; RUN: grep cgtbi %t1.s | count 5 +; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 +; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11 +; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 +; $3 = %arg1, $4 = %val1, $5 = %val2 +; +; For "positive" comparisons: +; selb $3, $6, $5, <i1> +; selb $3, $5, $4, <i1> +; +; For "negative" comparisons, i.e., those where the result of the comparison +; must be inverted (setne, for example): +; selb $3, $5, $6, <i1> +; selb $3, $4, $5, <i1> + +; i8 integer comparisons: +define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp eq i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp eq i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp eq i8 %arg1, 127 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp eq i8 %arg1, -128 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp eq i8 %arg1, -1 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ne i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ne i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ne i8 %arg1, 127 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ne i8 %arg1, -128 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ne i8 %arg1, -1 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ugt i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ugt i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ugt i8 %arg1, 126 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp uge i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp uge i8 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp uge i8 %arg1, <immed> can always be transformed into +;; icmp ugt i8 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ult i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ult i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ult i8 %arg1, 253 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ult i8 %arg1, 129 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ule i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp ule i8 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp ule i8 %arg1, <immed> can always be transformed into +;; icmp ult i8 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sgt i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sgt i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sgt i8 %arg1, 96 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sgt i8 %arg1, -1 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sgt i8 %arg1, -128 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sge i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sge i8 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sge i8 %arg1, <immed> can always be transformed into +;; icmp sgt i8 %arg1, <immed>-1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + +define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp slt i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp slt i8 %arg1, %arg2 + ret i1 %A +} + +define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp slt i8 %arg1, 96 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp slt i8 %arg1, -120 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp slt i8 %arg1, -1 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sle i8 %arg1, %arg2 + %B = select i1 %A, i8 %val1, i8 %val2 + ret i8 %B +} + +define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +entry: + %A = icmp sle i8 %arg1, %arg2 + ret i1 %A +} + +;; Note: icmp sle i8 %arg1, <immed> can always be transformed into +;; icmp slt i8 %arg1, <immed>+1 +;; +;; Consequently, even though the patterns exist to match, it's unlikely +;; they'll ever be generated. + diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll new file mode 100644 index 000000000000..9a461cbb85a6 --- /dev/null +++ b/test/CodeGen/CellSPU/immed16.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep "ilh" %t1.s | count 11 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i16 @test_1() { + %x = alloca i16, align 16 + store i16 419, i16* %x ;; ILH via pattern + ret i16 0 +} + +define i16 @test_2() { + %x = alloca i16, align 16 + store i16 1023, i16* %x ;; ILH via pattern + ret i16 0 +} + +define i16 @test_3() { + %x = alloca i16, align 16 + store i16 -1023, i16* %x ;; ILH via pattern + ret i16 0 +} + +define i16 @test_4() { + %x = alloca i16, align 16 + store i16 32767, i16* %x ;; ILH via pattern + ret i16 0 +} + +define i16 @test_5() { + %x = alloca i16, align 16 + store i16 -32768, i16* %x ;; ILH via pattern + ret i16 0 +} + +define i16 @test_6() { + ret i16 0 +} + + diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll new file mode 100644 index 000000000000..bf471b1eb1ce --- /dev/null +++ b/test/CodeGen/CellSPU/immed32.ll @@ -0,0 +1,72 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ilhu %t1.s | count 8 +; RUN: grep iohl %t1.s | count 6 +; RUN: grep -w il %t1.s | count 3 +; RUN: grep 16429 %t1.s | count 1 +; RUN: grep 63572 %t1.s | count 1 +; RUN: grep 128 %t1.s | count 1 +; RUN: grep 32639 %t1.s | count 1 +; RUN: grep 65535 %t1.s | count 1 +; RUN: grep 16457 %t1.s | count 1 +; RUN: grep 4059 %t1.s | count 1 +; RUN: grep 49077 %t1.s | count 1 +; RUN: grep 1267 %t1.s | count 2 +; RUN: grep 16309 %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i32 @test_1() { + ret i32 4784128 ;; ILHU via pattern (0x49000) +} + +define i32 @test_2() { + ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f) +} + +define i32 @test_3() { + ret i32 511 ;; IL via pattern +} + +define i32 @test_4() { + ret i32 -512 ;; IL via pattern +} + +;; double float floatval +;; 0x4005bf0a80000000 0x402d|f854 2.718282 +define float @float_const_1() { + ret float 0x4005BF0A80000000 ;; ILHU/IOHL +} + +;; double float floatval +;; 0x3810000000000000 0x0080|0000 0.000000 +define float @float_const_2() { + ret float 0x3810000000000000 ;; IL 128 +} + +;; double float floatval +;; 0x47efffffe0000000 0x7f7f|ffff NaN +define float @float_const_3() { + ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern +} + +;; double float floatval +;; 0x400921fb60000000 0x4049|0fdb 3.141593 +define float @float_const_4() { + ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern +} + +;; double float floatval +;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214 +define float @float_const_5() { + ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern +} + +;; double float floatval +;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214 +define float @float_const_6() { + ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern +} + +define float @float_const_7() { + ret float 0.000000e+00 ;; IL 0 via pattern +} diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll new file mode 100644 index 000000000000..bbda3ff329cb --- /dev/null +++ b/test/CodeGen/CellSPU/immed64.ll @@ -0,0 +1,95 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep lqa %t1.s | count 13 +; RUN: grep ilhu %t1.s | count 15 +; RUN: grep ila %t1.s | count 1 +; RUN: grep -w il %t1.s | count 6 +; RUN: grep shufb %t1.s | count 13 +; RUN: grep 65520 %t1.s | count 1 +; RUN: grep 43981 %t1.s | count 1 +; RUN: grep 13702 %t1.s | count 1 +; RUN: grep 28225 %t1.s | count 1 +; RUN: grep 30720 %t1.s | count 1 +; RUN: grep 3233857728 %t1.s | count 8 +; RUN: grep 2155905152 %t1.s | count 6 +; RUN: grep 66051 %t1.s | count 7 +; RUN: grep 471670303 %t1.s | count 11 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202) +; 18446744073709551591 => 0x ffffffff ffffffe7 (-25) +; 18446744073708516742 => 0x ffffffff fff03586 (-1034874) +; 5308431 => 0x 00000000 0051000F +; 9223372038704560128 => 0x 80000000 6e417800 + +define i64 @i64_const_1() { + ret i64 1311768467750121234 ;; Constant pool spill +} + +define i64 @i64_const_2() { + ret i64 18446744073709551591 ;; IL/SHUFB +} + +define i64 @i64_const_3() { + ret i64 18446744073708516742 ;; IHLU/IOHL/SHUFB +} + +define i64 @i64_const_4() { + ret i64 5308431 ;; ILHU/IOHL/SHUFB +} + +define i64 @i64_const_5() { + ret i64 511 ;; IL/SHUFB +} + +define i64 @i64_const_6() { + ret i64 -512 ;; IL/SHUFB +} + +define i64 @i64_const_7() { + ret i64 9223372038704560128 ;; IHLU/IOHL/SHUFB +} + +define i64 @i64_const_8() { + ret i64 0 ;; IL +} + +define i64 @i64_const_9() { + ret i64 -1 ;; IL +} + +define i64 @i64_const_10() { + ret i64 281470681808895 ;; IL 65535 +} + +; 0x4005bf0a8b145769 -> +; (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906]) +; (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377]) +define double @f64_const_1() { + ret double 0x4005bf0a8b145769 ;; ILHU/IOHL via pattern +} + +define double @f64_const_2() { + ret double 0x0010000000000000 +} + +define double @f64_const_3() { + ret double 0x7fefffffffffffff +} + +define double @f64_const_4() { + ret double 0x400921fb54442d18 +} + +define double @f64_const_5() { + ret double 0xbff6a09e667f3bcd ;; ILHU/IOHL via pattern +} + +define double @f64_const_6() { + ret double 0x3ff6a09e667f3bcd +} + +define double @f64_const_7() { + ret double 0.000000e+00 +} diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll new file mode 100644 index 000000000000..ee3076594ad6 --- /dev/null +++ b/test/CodeGen/CellSPU/int2fp.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep csflt %t1.s | count 5 +; RUN: grep cuflt %t1.s | count 1 +; RUN: grep xshw %t1.s | count 2 +; RUN: grep xsbh %t1.s | count 1 +; RUN: grep and %t1.s | count 2 +; RUN: grep andi %t1.s | count 1 +; RUN: grep ila %t1.s | count 1 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define float @sitofp_i32(i32 %arg1) { + %A = sitofp i32 %arg1 to float ; <float> [#uses=1] + ret float %A +} + +define float @uitofp_u32(i32 %arg1) { + %A = uitofp i32 %arg1 to float ; <float> [#uses=1] + ret float %A +} + +define float @sitofp_i16(i16 %arg1) { + %A = sitofp i16 %arg1 to float ; <float> [#uses=1] + ret float %A +} + +define float @uitofp_i16(i16 %arg1) { + %A = uitofp i16 %arg1 to float ; <float> [#uses=1] + ret float %A +} + +define float @sitofp_i8(i8 %arg1) { + %A = sitofp i8 %arg1 to float ; <float> [#uses=1] + ret float %A +} + +define float @uitofp_i8(i8 %arg1) { + %A = uitofp i8 %arg1 to float ; <float> [#uses=1] + ret float %A +} diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll new file mode 100644 index 000000000000..87ad18211a25 --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_branch.ll @@ -0,0 +1,150 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceq %t1.s | count 30 +; RUN: grep ceqb %t1.s | count 10 +; RUN: grep ceqhi %t1.s | count 5 +; RUN: grep ceqi %t1.s | count 5 +; RUN: grep cgt %t1.s | count 30 +; RUN: grep cgtb %t1.s | count 10 +; RUN: grep cgthi %t1.s | count 5 +; RUN: grep cgti %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8) + + + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @ceqitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll new file mode 100644 index 000000000000..c18f8deb385e --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_float.ll @@ -0,0 +1,94 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fa %t1.s | count 5 +; RUN: grep fs %t1.s | count 5 +; RUN: grep fm %t1.s | count 15 +; RUN: grep fceq %t1.s | count 5 +; RUN: grep fcmeq %t1.s | count 5 +; RUN: grep fcgt %t1.s | count 5 +; RUN: grep fcmgt %t1.s | count 5 +; RUN: grep fma %t1.s | count 5 +; RUN: grep fnms %t1.s | count 5 +; RUN: grep fms %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>) + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll new file mode 100644 index 000000000000..843340b74542 --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_logical.ll @@ -0,0 +1,49 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 20 +; RUN: grep andc %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>) + +define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @anditest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @andhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll new file mode 100644 index 000000000000..3b9746c8080a --- /dev/null +++ b/test/CodeGen/CellSPU/loads.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {lqd.*0(\$3)} %t1.s | count 1 +; RUN: grep {lqd.*16(\$3)} %t1.s | count 1 + +; ModuleID = 'loads.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly { +entry: + %tmp1 = load <4 x float>* %a + ret <4 x float> %tmp1 +} + +define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1] + %tmp1 = load <4 x float>* %arrayidx ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp1 +} diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll new file mode 100644 index 000000000000..085ce555dc25 --- /dev/null +++ b/test/CodeGen/CellSPU/mul_ops.ll @@ -0,0 +1,89 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep mpy %t1.s | count 44 +; RUN: grep mpyu %t1.s | count 4 +; RUN: grep mpyh %t1.s | count 10 +; RUN: grep mpyhh %t1.s | count 2 +; RUN: grep rotma %t1.s | count 12 +; RUN: grep rotmahi %t1.s | count 4 +; RUN: grep and %t1.s | count 2 +; RUN: grep selb %t1.s | count 6 +; RUN: grep fsmbi %t1.s | count 4 +; RUN: grep shli %t1.s | count 4 +; RUN: grep shlhi %t1.s | count 4 +; RUN: grep ila %t1.s | count 2 +; RUN: grep xsbh %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; 32-bit multiply instruction generation: +define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { +entry: + %A = mul <4 x i32> %arg1, %arg2 + ret <4 x i32> %A +} + +define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { +entry: + %A = mul <4 x i32> %arg2, %arg1 + ret <4 x i32> %A +} + +define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { +entry: + %A = mul <8 x i16> %arg1, %arg2 + ret <8 x i16> %A +} + +define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { +entry: + %A = mul <8 x i16> %arg2, %arg1 + ret <8 x i16> %A +} + +define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { +entry: + %A = mul <16 x i8> %arg2, %arg1 + ret <16 x i8> %A +} + +define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { +entry: + %A = mul <16 x i8> %arg1, %arg2 + ret <16 x i8> %A +} + +define i32 @mul_i32_1(i32 %arg1, i32 %arg2) { +entry: + %A = mul i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @mul_i32_2(i32 %arg1, i32 %arg2) { +entry: + %A = mul i32 %arg1, %arg2 + ret i32 %A +} + +define i16 @mul_i16_1(i16 %arg1, i16 %arg2) { +entry: + %A = mul i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @mul_i16_2(i16 %arg1, i16 %arg2) { +entry: + %A = mul i16 %arg1, %arg2 + ret i16 %A +} + +define i8 @mul_i8_1(i8 %arg1, i8 %arg2) { +entry: + %A = mul i8 %arg2, %arg1 + ret i8 %A +} + +define i8 @mul_i8_2(i8 %arg1, i8 %arg2) { +entry: + %A = mul i8 %arg1, %arg2 + ret i8 %A +} diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll new file mode 100644 index 000000000000..841a3ec54d6f --- /dev/null +++ b/test/CodeGen/CellSPU/nand.ll @@ -0,0 +1,121 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep nand %t1.s | count 90 +; RUN: grep and %t1.s | count 94 +; RUN: grep xsbh %t1.s | count 2 +; RUN: grep xshw %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %B +} + +define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %B +} + +define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %B +} + +define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1] + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %B +} + +define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + ret <16 x i8> %B +} + +define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1] + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + ret <16 x i8> %B +} + +define i32 @nand_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg2, %arg1 ; <i32> [#uses=1] + %B = xor i32 %A, -1 ; <i32> [#uses=1] + ret i32 %B +} + +define i32 @nand_i32_2(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 ; <i32> [#uses=1] + %B = xor i32 %A, -1 ; <i32> [#uses=1] + ret i32 %B +} + +define i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg2, %arg1 ; <i16> [#uses=1] + %B = xor i16 %A, -1 ; <i16> [#uses=1] + ret i16 %B +} + +define i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] + %B = xor i16 %A, -1 ; <i16> [#uses=1] + ret i16 %B +} + +define i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = and i16 %arg2, %arg1 ; <i16> [#uses=1] + %B = xor i16 %A, -1 ; <i16> [#uses=1] + ret i16 %B +} + +define i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] + %B = xor i16 %A, -1 ; <i16> [#uses=1] + ret i16 %B +} + +define i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} + +define i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_3(i8 %arg1, i8 %arg2) { + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_4(i8 %arg1, i8 %arg2) { + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] + %B = xor i8 %A, -1 ; <i8> [#uses=1] + ret i8 %B +} diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll new file mode 100644 index 000000000000..4e9da8f12972 --- /dev/null +++ b/test/CodeGen/CellSPU/or_ops.ll @@ -0,0 +1,264 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 2 +; RUN: grep orc %t1.s | count 85 +; RUN: grep ori %t1.s | count 30 +; RUN: grep orhi %t1.s | count 30 +; RUN: grep orbi %t1.s | count 15 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; OR instruction generation: +define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = or <4 x i32> %arg1, %arg2 + ret <4 x i32> %A +} + +define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = or <4 x i32> %arg2, %arg1 + ret <4 x i32> %A +} + +define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = or <8 x i16> %arg1, %arg2 + ret <8 x i16> %A +} + +define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = or <8 x i16> %arg2, %arg1 + ret <8 x i16> %A +} + +define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = or <16 x i8> %arg2, %arg1 + ret <16 x i8> %A +} + +define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = or <16 x i8> %arg1, %arg2 + ret <16 x i8> %A +} + +define i32 @or_i32_1(i32 %arg1, i32 %arg2) { + %A = or i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @or_i32_2(i32 %arg1, i32 %arg2) { + %A = or i32 %arg1, %arg2 + ret i32 %A +} + +define i16 @or_i16_1(i16 %arg1, i16 %arg2) { + %A = or i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @or_i16_2(i16 %arg1, i16 %arg2) { + %A = or i16 %arg1, %arg2 + ret i16 %A +} + +define i8 @or_i8_1(i8 %arg1, i8 %arg2) { + %A = or i8 %arg2, %arg1 + ret i8 %A +} + +define i8 @or_i8_2(i8 %arg1, i8 %arg2) { + %A = or i8 %arg1, %arg2 + ret i8 %A +} + +; ORC instruction generation: +define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = or <4 x i32> %arg1, %A + ret <4 x i32> %B +} + +define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = or <4 x i32> %arg2, %A + ret <4 x i32> %B +} + +define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = or <4 x i32> %A, %arg2 + ret <4 x i32> %B +} + +define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = or <8 x i16> %arg1, %A + ret <8 x i16> %B +} + +define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = or <8 x i16> %arg2, %A + ret <8 x i16> %B +} + +define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = or <16 x i8> %arg2, %A + ret <16 x i8> %B +} + +define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = or <16 x i8> %arg1, %A + ret <16 x i8> %B +} + +define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = or <16 x i8> %A, %arg1 + ret <16 x i8> %B +} + +define i32 @orc_i32_1(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = or i32 %A, %arg1 + ret i32 %B +} + +define i32 @orc_i32_2(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg1, -1 + %B = or i32 %A, %arg2 + ret i32 %B +} + +define i32 @orc_i32_3(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = or i32 %arg1, %A + ret i32 %B +} + +define i16 @orc_i16_1(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = or i16 %A, %arg1 + ret i16 %B +} + +define i16 @orc_i16_2(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg1, -1 + %B = or i16 %A, %arg2 + ret i16 %B +} + +define i16 @orc_i16_3(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = or i16 %arg1, %A + ret i16 %B +} + +define i8 @orc_i8_1(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = or i8 %A, %arg1 + ret i8 %B +} + +define i8 @orc_i8_2(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg1, -1 + %B = or i8 %A, %arg2 + ret i8 %B +} + +define i8 @orc_i8_3(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = or i8 %arg1, %A + ret i8 %B +} + +; ORI instruction generation (i32 data type): +define <4 x i32> @ori_v4i32_1(<4 x i32> %in) { + %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @ori_v4i32_2(<4 x i32> %in) { + %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @ori_v4i32_3(<4 x i32> %in) { + %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @ori_v4i32_4(<4 x i32> %in) { + %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 > + ret <4 x i32> %tmp2 +} + +define i32 @ori_u32(i32 zeroext %in) zeroext { + %tmp37 = or i32 %in, 37 ; <i32> [#uses=1] + ret i32 %tmp37 +} + +define i32 @ori_i32(i32 signext %in) signext { + %tmp38 = or i32 %in, 37 ; <i32> [#uses=1] + ret i32 %tmp38 +} + +; ORHI instruction generation (i16 data type): +define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) { + %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511, + i16 511, i16 511, i16 511, i16 511 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) { + %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510, + i16 510, i16 510, i16 510, i16 510 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) { + %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) { + %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512, + i16 -512, i16 -512, i16 -512, i16 -512 > + ret <8 x i16> %tmp2 +} + +define i16 @orhi_u16(i16 zeroext %in) zeroext { + %tmp37 = or i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp37 +} + +define i16 @orhi_i16(i16 signext %in) signext { + %tmp38 = or i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp38 +} + +; ORBI instruction generation (i8 data type): +define <16 x i8> @orbi_v16i8(<16 x i8> %in) { + %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42 > + ret <16 x i8> %tmp2 +} + +define i8 @orbi_u8(i8 zeroext %in) zeroext { + %tmp37 = or i8 %in, 37 ; <i8> [#uses=1] + ret i8 %tmp37 +} + +define i8 @orbi_i8(i8 signext %in) signext { + %tmp38 = or i8 %in, 37 ; <i8> [#uses=1] + ret i8 %tmp38 +} diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll new file mode 100644 index 000000000000..91567ce82803 --- /dev/null +++ b/test/CodeGen/CellSPU/private.ll @@ -0,0 +1,22 @@ +; Test to make sure that the 'private' is used correctly. +; +; RUN: llvm-as < %s | llc -march=cellspu > %t +; RUN: grep .Lfoo: %t +; RUN: grep brsl.*\.Lfoo %t +; RUN: grep .Lbaz: %t +; RUN: grep ila.*\.Lbaz %t + + +declare void @foo() + +define private void @foo() { + ret void +} + +@baz = private global i32 4; + +define i32 @bar() { + call void @foo() + %1 = load i32* @baz, align 4 + ret i32 %1 +} diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll new file mode 100644 index 000000000000..e308172486a5 --- /dev/null +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -0,0 +1,160 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s +; RUN: grep rot %t1.s | count 85 +; RUN: grep roth %t1.s | count 8 +; RUN: grep roti.*5 %t1.s | count 1 +; RUN: grep roti.*27 %t1.s | count 1 +; RUN grep rothi.*5 %t1.s | count 2 +; RUN grep rothi.*11 %t1.s | count 1 +; RUN grep rothi.*,.3 %t1.s | count 1 +; RUN: grep andhi %t1.s | count 4 +; RUN: grep shlhi %t1.s | count 4 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; Vector rotates are not currently supported in gcc or llvm assembly. These are +; not tested. + +; 32-bit rotates: +define i32 @rotl32_1a(i32 %arg1, i8 %arg2) { + %tmp1 = zext i8 %arg2 to i32 ; <i32> [#uses=1] + %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1] + %arg22 = sub i8 32, %arg2 ; <i8> [#uses=1] + %tmp2 = zext i8 %arg22 to i32 ; <i32> [#uses=1] + %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotl32_1b(i32 %arg1, i16 %arg2) { + %tmp1 = zext i16 %arg2 to i32 ; <i32> [#uses=1] + %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1] + %arg22 = sub i16 32, %arg2 ; <i8> [#uses=1] + %tmp2 = zext i16 %arg22 to i32 ; <i32> [#uses=1] + %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotl32_2(i32 %arg1, i32 %arg2) { + %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1] + %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotl32_3(i32 %arg1, i32 %arg2) { + %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1] + %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotl32_4(i32 %arg1, i32 %arg2) { + %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1] + %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotr32_1(i32 %A, i8 %Amt) { + %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1] + %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1] + %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1] + %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1] + %C = shl i32 %A, %tmp2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotr32_2(i32 %A, i8 %Amt) { + %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1] + %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1] + %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1] + %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1] + %C = shl i32 %A, %tmp2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +; Rotate left with immediate +define i32 @rotli32(i32 %A) { + %B = shl i32 %A, 5 ; <i32> [#uses=1] + %C = lshr i32 %A, 27 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +; Rotate right with immediate +define i32 @rotri32(i32 %A) { + %B = lshr i32 %A, 5 ; <i32> [#uses=1] + %C = shl i32 %A, 27 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +; 16-bit rotates: +define i16 @rotr16_1(i16 %arg1, i8 %arg) { + %tmp1 = zext i8 %arg to i16 ; <i16> [#uses=1] + %B = lshr i16 %arg1, %tmp1 ; <i16> [#uses=1] + %arg2 = sub i8 16, %arg ; <i8> [#uses=1] + %tmp2 = zext i8 %arg2 to i16 ; <i16> [#uses=1] + %C = shl i16 %arg1, %tmp2 ; <i16> [#uses=1] + %D = or i16 %B, %C ; <i16> [#uses=1] + ret i16 %D +} + +define i16 @rotr16_2(i16 %arg1, i16 %arg) { + %B = lshr i16 %arg1, %arg ; <i16> [#uses=1] + %tmp1 = sub i16 16, %arg ; <i16> [#uses=1] + %C = shl i16 %arg1, %tmp1 ; <i16> [#uses=1] + %D = or i16 %B, %C ; <i16> [#uses=1] + ret i16 %D +} + +define i16 @rotli16(i16 %A) { + %B = shl i16 %A, 5 ; <i16> [#uses=1] + %C = lshr i16 %A, 11 ; <i16> [#uses=1] + %D = or i16 %B, %C ; <i16> [#uses=1] + ret i16 %D +} + +define i16 @rotri16(i16 %A) { + %B = lshr i16 %A, 5 ; <i16> [#uses=1] + %C = shl i16 %A, 11 ; <i16> [#uses=1] + %D = or i16 %B, %C ; <i16> [#uses=1] + ret i16 %D +} + +define i8 @rotl8(i8 %A, i8 %Amt) { + %B = shl i8 %A, %Amt ; <i8> [#uses=1] + %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1] + %C = lshr i8 %A, %Amt2 ; <i8> [#uses=1] + %D = or i8 %B, %C ; <i8> [#uses=1] + ret i8 %D +} + +define i8 @rotr8(i8 %A, i8 %Amt) { + %B = lshr i8 %A, %Amt ; <i8> [#uses=1] + %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1] + %C = shl i8 %A, %Amt2 ; <i8> [#uses=1] + %D = or i8 %B, %C ; <i8> [#uses=1] + ret i8 %D +} + +define i8 @rotli8(i8 %A) { + %B = shl i8 %A, 5 ; <i8> [#uses=1] + %C = lshr i8 %A, 3 ; <i8> [#uses=1] + %D = or i8 %B, %C ; <i8> [#uses=1] + ret i8 %D +} + +define i8 @rotri8(i8 %A) { + %B = lshr i8 %A, 5 ; <i8> [#uses=1] + %C = shl i8 %A, 3 ; <i8> [#uses=1] + %D = or i8 %B, %C ; <i8> [#uses=1] + ret i8 %D +} diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll new file mode 100644 index 000000000000..e83e47606c28 --- /dev/null +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -0,0 +1,569 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep selb %t1.s | count 56 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v2i64 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rC, %rB + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rB, %rC + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %C = and <2 x i64> %rB, %rC + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %C = and <2 x i64> %rC, %rB + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rC, %rB + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rB, %rC + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %C = and <2 x i64> %rB, %rC + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %C = and <2 x i64> %rC, %rB + %D = or <2 x i64> %C, %B + ret <2 x i64> %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v4i32 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rC, %rB + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rB, %rC + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %C = and <4 x i32> %rB, %rC + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %A, %rA + %C = and <4 x i32> %rC, %rB + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rC, %rB + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rB, %rC + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %C = and <4 x i32> %rB, %rC + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %C = and <4 x i32> %rC, %rB + %D = or <4 x i32> %C, %B + ret <4 x i32> %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v8i16 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rC, %rB + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rB, %rC + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %C = and <8 x i16> %rB, %rC + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %C = and <8 x i16> %rC, %rB + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rC, %rB + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rB, %rC + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %C = and <8 x i16> %rB, %rC + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %C = and <8 x i16> %rC, %rB + %D = or <8 x i16> %C, %B + ret <8 x i16> %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v16i8 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rC, %rB + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rB, %rC + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %C = and <16 x i8> %rB, %rC + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %C = and <16 x i8> %rC, %rB + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rC, %rB + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rB, %rC + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %C = and <16 x i8> %rB, %rC + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %C = and <16 x i8> %rC, %rB + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i32 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rC, %rB + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rB, %rC + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %C = and i32 %rB, %rC + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %C = and i32 %rC, %rB + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rC, %rB + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rB, %rC + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %C = and i32 %rB, %rC + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %C = and i32 %rC, %rB + %D = or i32 %C, %B + ret i32 %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i16 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rC, %rB + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rB, %rC + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %C = and i16 %rB, %rC + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %C = and i16 %rC, %rB + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rC, %rB + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rB, %rC + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %C = and i16 %rB, %rC + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %C = and i16 %rC, %rB + %D = or i16 %C, %B + ret i16 %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i8 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rC, %rB + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rB, %rC + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %C = and i8 %rB, %rC + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %C = and i8 %rC, %rB + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rC, %rB + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rB, %rC + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %C = and i8 %rB, %rC + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %C = and i8 %rC, %rB + %D = or i8 %C, %B + ret i8 %D +} diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll new file mode 100644 index 000000000000..3c26baa7c7ab --- /dev/null +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -0,0 +1,283 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {shlh } %t1.s | count 9 +; RUN: grep {shlhi } %t1.s | count 3 +; RUN: grep {shl } %t1.s | count 9 +; RUN: grep {shli } %t1.s | count 3 +; RUN: grep {xshw } %t1.s | count 5 +; RUN: grep {and } %t1.s | count 5 +; RUN: grep {andi } %t1.s | count 2 +; RUN: grep {rotmi } %t1.s | count 2 +; RUN: grep {rotqmbyi } %t1.s | count 1 +; RUN: grep {rotqmbii } %t1.s | count 2 +; RUN: grep {rotqmby } %t1.s | count 1 +; RUN: grep {rotqmbi } %t1.s | count 1 +; RUN: grep {rotqbyi } %t1.s | count 1 +; RUN: grep {rotqbii } %t1.s | count 2 +; RUN: grep {rotqbybi } %t1.s | count 1 +; RUN: grep {sfi } %t1.s | count 3 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; Vector shifts are not currently supported in gcc or llvm assembly. These are +; not tested. + +; Shift left i16 via register, note that the second operand to shl is promoted +; to a 32-bit type: + +define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +; Shift left i16 with immediate: +define i16 @shlhi_i16_1(i16 %arg1) { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_2(i16 %arg1) { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_3(i16 %arg1) { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_4(i16 %arg1) { + %A = shl i16 0, %arg1 + ret i16 %A +} + +define i16 @shlhi_i16_5(i16 signext %arg1) signext { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_6(i16 signext %arg1) signext { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_7(i16 signext %arg1) signext { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_8(i16 signext %arg1) signext { + %A = shl i16 0, %arg1 + ret i16 %A +} + +define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext { + %A = shl i16 0, %arg1 + ret i16 %A +} + +; Shift left i32 via register, note that the second operand to shl is promoted +; to a 32-bit type: + +define i32 @shl_i32_1(i32 %arg1, i32 %arg2) { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_2(i32 %arg1, i32 %arg2) { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +; Shift left i32 with immediate: +define i32 @shli_i32_1(i32 %arg1) { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_2(i32 %arg1) { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_3(i32 %arg1) { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_4(i32 %arg1) { + %A = shl i32 0, %arg1 + ret i32 %A +} + +define i32 @shli_i32_5(i32 signext %arg1) signext { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_6(i32 signext %arg1) signext { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_7(i32 signext %arg1) signext { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_8(i32 signext %arg1) signext { + %A = shl i32 0, %arg1 + ret i32 %A +} + +define i32 @shli_i32_9(i32 zeroext %arg1) zeroext { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_10(i32 zeroext %arg1) zeroext { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_11(i32 zeroext %arg1) zeroext { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { + %A = shl i32 0, %arg1 + ret i32 %A +} + +;; i64 shift left + +define i64 @shl_i64_1(i64 %arg1) { + %A = shl i64 %arg1, 9 + ret i64 %A +} + +define i64 @shl_i64_2(i64 %arg1) { + %A = shl i64 %arg1, 3 + ret i64 %A +} + +define i64 @shl_i64_3(i64 %arg1, i32 %shift) { + %1 = zext i32 %shift to i64 + %2 = shl i64 %arg1, %1 + ret i64 %2 +} + +;; i64 shift right logical (shift 0s from the right) + +define i64 @lshr_i64_1(i64 %arg1) { + %1 = lshr i64 %arg1, 9 + ret i64 %1 +} + +define i64 @lshr_i64_2(i64 %arg1) { + %1 = lshr i64 %arg1, 3 + ret i64 %1 +} + +define i64 @lshr_i64_3(i64 %arg1, i32 %shift) { + %1 = zext i32 %shift to i64 + %2 = lshr i64 %arg1, %1 + ret i64 %2 +} + +;; i64 shift right arithmetic (shift 1s from the right) + +define i64 @ashr_i64_1(i64 %arg) { + %1 = ashr i64 %arg, 9 + ret i64 %1 +} + +define i64 @ashr_i64_2(i64 %arg) { + %1 = ashr i64 %arg, 3 + ret i64 %1 +} + +define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { + %1 = zext i32 %shift to i64 + %2 = ashr i64 %arg1, %1 + ret i64 %2 +} + +define i32 @hi32_i64(i64 %arg) { + %1 = lshr i64 %arg, 32 + %2 = trunc i64 %1 to i32 + ret i32 %2 +} diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll new file mode 100644 index 000000000000..df3baef85c9d --- /dev/null +++ b/test/CodeGen/CellSPU/sp_farith.ll @@ -0,0 +1,90 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu -enable-unsafe-fp-math > %t1.s +; RUN: grep fa %t1.s | count 2 +; RUN: grep fs %t1.s | count 2 +; RUN: grep fm %t1.s | count 6 +; RUN: grep fma %t1.s | count 2 +; RUN: grep fms %t1.s | count 2 +; RUN: grep fnms %t1.s | count 3 +; +; This file includes standard floating point arithmetic instructions +; NOTE fdiv is tested separately since it is a compound operation +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define float @fp_add(float %arg1, float %arg2) { + %A = add float %arg1, %arg2 ; <float> [#uses=1] + ret float %A +} + +define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_sub(float %arg1, float %arg2) { + %A = sub float %arg1, %arg2 ; <float> [#uses=1] + ret float %A +} + +define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_mul(float %arg1, float %arg2) { + %A = mul float %arg1, %arg2 ; <float> [#uses=1] + ret float %A +} + +define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_mul_add(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 ; <float> [#uses=1] + %B = add float %A, %arg3 ; <float> [#uses=1] + ret float %B +} + +define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B +} + +define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 ; <float> [#uses=1] + %B = sub float %A, %arg3 ; <float> [#uses=1] + ret float %B +} + +define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B +} + +; Test the straightforward way of getting fnms +; c - a * b +define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 + %B = sub float %arg3, %A + ret float %B +} + +; Test another way of getting fnms +; - ( a *b -c ) = c - a * b +define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 + %B = sub float %A, %arg3 + %C = sub float -0.0, %B + ret float %C +} + +define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 + %B = sub <4 x float> %A, %arg3 + %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B + ret <4 x float> %D +} diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll new file mode 100644 index 000000000000..f2f35ef4dbc4 --- /dev/null +++ b/test/CodeGen/CellSPU/stores.ll @@ -0,0 +1,151 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {stqd.*0(\$3)} %t1.s | count 4 +; RUN: grep {stqd.*16(\$3)} %t1.s | count 4 +; RUN: grep 16256 %t1.s | count 2 +; RUN: grep 16384 %t1.s | count 1 +; RUN: grep 771 %t1.s | count 4 +; RUN: grep 515 %t1.s | count 2 +; RUN: grep 1799 %t1.s | count 2 +; RUN: grep 1543 %t1.s | count 5 +; RUN: grep 1029 %t1.s | count 3 +; RUN: grep {shli.*, 4} %t1.s | count 4 +; RUN: grep stqx %t1.s | count 4 +; RUN: grep ilhu %t1.s | count 11 +; RUN: grep iohl %t1.s | count 8 +; RUN: grep shufb %t1.s | count 15 +; RUN: grep frds %t1.s | count 1 + +; ModuleID = 'stores.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define void @store_v16i8_1(<16 x i8>* %a) nounwind { +entry: + store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a + ret void +} + +define void @store_v16i8_2(<16 x i8>* %a) nounwind { +entry: + %arrayidx = getelementptr <16 x i8>* %a, i32 1 + store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx + ret void +} + +define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <16 x i8>* %a, i32 %i + store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx + ret void +} + +define void @store_v8i16_1(<8 x i16>* %a) nounwind { +entry: + store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a + ret void +} + +define void @store_v8i16_2(<8 x i16>* %a) nounwind { +entry: + %arrayidx = getelementptr <8 x i16>* %a, i16 1 + store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx + ret void +} + +define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <8 x i16>* %a, i32 %i + store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx + ret void +} + +define void @store_v4i32_1(<4 x i32>* %a) nounwind { +entry: + store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a + ret void +} + +define void @store_v4i32_2(<4 x i32>* %a) nounwind { +entry: + %arrayidx = getelementptr <4 x i32>* %a, i32 1 + store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx + ret void +} + +define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x i32>* %a, i32 %i + store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx + ret void +} + +define void @store_v4f32_1(<4 x float>* %a) nounwind { +entry: + store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a + ret void +} + +define void @store_v4f32_2(<4 x float>* %a) nounwind { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 1 + store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx + ret void +} + +define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 %i + store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx + ret void +} + +; Test truncating stores: + +define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind { +entry: + %conv = trunc i16 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind { +entry: + %conv = trunc i32 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind { +entry: + %conv = trunc i32 %val to i16 + store i16 %conv, i16* %dest + ret i16 %conv +} + +define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind { +entry: + %conv = trunc i64 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind { +entry: + %conv = trunc i64 %val to i16 + store i16 %conv, i16* %dest + ret i16 %conv +} + +define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind { +entry: + %conv = trunc i64 %val to i32 + store i32 %conv, i32* %dest + ret i32 %conv +} + +define float @tstore_f64_f32(double %val, float* %dest) nounwind { +entry: + %conv = fptrunc double %val to float + store float %conv, float* %dest + ret float %conv +} diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll new file mode 100644 index 000000000000..82d319dd1050 --- /dev/null +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -0,0 +1,144 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s +; RUN: grep lqa %t1.s | count 5 +; RUN: grep lqd %t1.s | count 11 +; RUN: grep rotqbyi %t1.s | count 7 +; RUN: grep xshw %t1.s | count 1 +; RUN: grep andi %t1.s | count 5 +; RUN: grep cbd %t1.s | count 3 +; RUN: grep chd %t1.s | count 1 +; RUN: grep cwd %t1.s | count 3 +; RUN: grep shufb %t1.s | count 7 +; RUN: grep stqd %t1.s | count 7 +; RUN: grep iohl %t2.s | count 16 +; RUN: grep ilhu %t2.s | count 16 +; RUN: grep lqd %t2.s | count 16 +; RUN: grep rotqbyi %t2.s | count 7 +; RUN: grep xshw %t2.s | count 1 +; RUN: grep andi %t2.s | count 5 +; RUN: grep cbd %t2.s | count 3 +; RUN: grep chd %t2.s | count 1 +; RUN: grep cwd %t2.s | count 3 +; RUN: grep shufb %t2.s | count 7 +; RUN: grep stqd %t2.s | count 7 + +; ModuleID = 'struct_1.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; struct hackstate { +; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) +; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) +; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) +; int i1; // offset 4 (rotate left by 4 bytes to byte 0) +; short s1; // offset 8 (rotate left by 6 bytes to byte 2) +; int i2; // offset 12 [ignored] +; unsigned char c4; // offset 16 [ignored] +; unsigned char c5; // offset 17 [ignored] +; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3) +; unsigned char c7; // offset 19 (no rotate, in preferred slot) +; int i3; // offset 20 [ignored] +; int i4; // offset 24 [ignored] +; int i5; // offset 28 [ignored] +; int i6; // offset 32 (no rotate, in preferred slot) +; } +%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } + +; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +@state = global %struct.hackstate zeroinitializer, align 16 + +define i8 @get_hackstate_c1() zeroext nounwind { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c2() zeroext nounwind { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c3() zeroext nounwind { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i1() nounwind { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 +} + +define i16 @get_hackstate_s1() signext nounwind { +entry: + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 +} + +define i8 @get_hackstate_c6() zeroext nounwind { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c7() zeroext nounwind { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i3() nounwind { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret i32 %tmp2 +} + +define i32 @get_hackstate_i6() nounwind { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 +} + +define void @set_hackstate_c1(i8 zeroext %c) nounwind { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void +} + +define void @set_hackstate_c2(i8 zeroext %c) nounwind { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void +} + +define void @set_hackstate_c3(i8 zeroext %c) nounwind { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void +} + +define void @set_hackstate_i1(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void +} + +define void @set_hackstate_s1(i16 signext %s) nounwind { +entry: + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void +} + +define void @set_hackstate_i3(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret void +} + +define void @set_hackstate_i6(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret void +} diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll new file mode 100644 index 000000000000..db22564f4341 --- /dev/null +++ b/test/CodeGen/CellSPU/trunc.ll @@ -0,0 +1,94 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep shufb %t1.s | count 19 +; RUN: grep {ilhu.*1799} %t1.s | count 1 +; RUN: grep {ilhu.*771} %t1.s | count 2 +; RUN: grep {ilhu.*1543} %t1.s | count 1 +; RUN: grep {ilhu.*1029} %t1.s | count 1 +; RUN: grep {ilhu.*515} %t1.s | count 1 +; RUN: grep {ilhu.*3855} %t1.s | count 1 +; RUN: grep {ilhu.*3599} %t1.s | count 1 +; RUN: grep {ilhu.*3085} %t1.s | count 1 +; RUN: grep {iohl.*3855} %t1.s | count 1 +; RUN: grep {iohl.*3599} %t1.s | count 2 +; RUN: grep {iohl.*1543} %t1.s | count 2 +; RUN: grep {iohl.*771} %t1.s | count 2 +; RUN: grep {iohl.*515} %t1.s | count 1 +; RUN: grep {iohl.*1799} %t1.s | count 1 +; RUN: grep lqa %t1.s | count 1 +; RUN: grep cbd %t1.s | count 4 +; RUN: grep chd %t1.s | count 3 +; RUN: grep cwd %t1.s | count 1 +; RUN: grep cdd %t1.s | count 1 + +; ModuleID = 'trunc.bc' +target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128" +target triple = "spu" + +define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) { +entry: + %0 = trunc i128 %u to i8 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 + ret <16 x i8> %tmp1 +} + +define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) { +entry: + %0 = trunc i128 %u to i16 + %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 + ret <8 x i16> %tmp1 +} + +define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) { +entry: + %0 = trunc i128 %u to i32 + %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2 + ret <4 x i32> %tmp1 +} + +define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) { +entry: + %0 = trunc i128 %u to i64 + %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1 + ret <2 x i64> %tmp1 +} + +define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) { +entry: + %0 = trunc i64 %u to i8 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10 + ret <16 x i8> %tmp1 +} + +define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) { +entry: + %0 = trunc i64 %u to i16 + %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6 + ret <8 x i16> %tmp1 +} + +define i32 @trunc_i64_i32(i64 %u) { +entry: + %0 = trunc i64 %u to i32 + ret i32 %0 +} + +define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) { +entry: + %0 = trunc i32 %u to i8 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7 + ret <16 x i8> %tmp1 +} + +define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) { +entry: + %0 = trunc i32 %u to i16 + %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3 + ret <8 x i16> %tmp1 +} + +define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) { +entry: + %0 = trunc i16 %u to i8 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5 + ret <16 x i8> %tmp1 +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/README.txt b/test/CodeGen/CellSPU/useful-harnesses/README.txt new file mode 100644 index 000000000000..d87b3989e4f7 --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/README.txt @@ -0,0 +1,5 @@ +This directory contains code that's not part of the DejaGNU test suite, +but is generally useful as various test harnesses. + +vecoperations.c: Various vector operation sanity checks, e.g., shuffles, + 8-bit vector add and multiply. diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c new file mode 100644 index 000000000000..12fc30bf65d7 --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c @@ -0,0 +1,69 @@ +#include <stdio.h> + +typedef unsigned int uint32_t; +typedef int int32_t; + +const char *boolstring(int val) { + return val ? "true" : "false"; +} + +int i32_eq(int32_t a, int32_t b) { + return (a == b); +} + +int i32_neq(int32_t a, int32_t b) { + return (a != b); +} + +int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) { + return ((a == b) ? c : d); +} + +int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) { + return ((a != b) ? c : d); +} + +struct pred_s { + const char *name; + int (*predfunc)(int32_t, int32_t); + int (*selfunc)(int32_t, int32_t, int32_t, int32_t); +}; + +struct pred_s preds[] = { + { "eq", i32_eq, i32_eq_select }, + { "neq", i32_neq, i32_neq_select } +}; + +int main(void) { + int i; + int32_t a = 1234567890; + int32_t b = 345678901; + int32_t c = 1234500000; + int32_t d = 10001; + int32_t e = 10000; + + printf("a = %12d (0x%08x)\n", a, a); + printf("b = %12d (0x%08x)\n", b, b); + printf("c = %12d (0x%08x)\n", c, c); + printf("d = %12d (0x%08x)\n", d, d); + printf("e = %12d (0x%08x)\n", e, e); + printf("----------------------------------------\n"); + + for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { + printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); + printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); + printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b))); + printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c))); + printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e))); + printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e))); + + printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d)); + printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c)); + printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d)); + printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d)); + + printf("----------------------------------------\n"); + } + + return 0; +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c new file mode 100644 index 000000000000..b613bd872e28 --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c @@ -0,0 +1,673 @@ +#include <stdio.h> +#include "i64operations.h" + +int64_t tval_a = 1234567890003LL; +int64_t tval_b = 2345678901235LL; +int64_t tval_c = 1234567890001LL; +int64_t tval_d = 10001LL; +int64_t tval_e = 10000LL; +uint64_t tval_f = 0xffffff0750135eb9; +int64_t tval_g = -1; + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +int +i64_eq(int64_t a, int64_t b) +{ + return (a == b); +} + +int +i64_neq(int64_t a, int64_t b) +{ + return (a != b); +} + +int +i64_gt(int64_t a, int64_t b) +{ + return (a > b); +} + +int +i64_le(int64_t a, int64_t b) +{ + return (a <= b); +} + +int +i64_ge(int64_t a, int64_t b) { + return (a >= b); +} + +int +i64_lt(int64_t a, int64_t b) { + return (a < b); +} + +int +i64_uge(uint64_t a, uint64_t b) +{ + return (a >= b); +} + +int +i64_ult(uint64_t a, uint64_t b) +{ + return (a < b); +} + +int +i64_ugt(uint64_t a, uint64_t b) +{ + return (a > b); +} + +int +i64_ule(uint64_t a, uint64_t b) +{ + return (a <= b); +} + +int64_t +i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d) +{ + return ((a == b) ? c : d); +} + +int64_t +i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d) +{ + return ((a != b) ? c : d); +} + +int64_t +i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a > b) ? c : d); +} + +int64_t +i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a <= b) ? c : d); +} + +int64_t +i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a >= b) ? c : d); +} + +int64_t +i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a < b) ? c : d); +} + +uint64_t +i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + return ((a > b) ? c : d); +} + +uint64_t +i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + return ((a <= b) ? c : d); +} + +uint64_t +i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { + return ((a >= b) ? c : d); +} + +uint64_t +i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { + return ((a < b) ? c : d); +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +struct harness_int64_pred int64_tests_eq[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} +}; + +struct harness_int64_pred int64_tests_neq[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} +}; + +struct harness_int64_pred int64_tests_sgt[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} +}; + +struct harness_int64_pred int64_tests_sle[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} +}; + +struct harness_int64_pred int64_tests_sge[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} +}; + +struct harness_int64_pred int64_tests_slt[] = { + {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, + {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, + {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} +}; + +struct int64_pred_s int64_preds[] = { + {"eq", i64_eq, i64_eq_select, + int64_tests_eq, ARR_SIZE(int64_tests_eq)}, + {"neq", i64_neq, i64_neq_select, + int64_tests_neq, ARR_SIZE(int64_tests_neq)}, + {"gt", i64_gt, i64_gt_select, + int64_tests_sgt, ARR_SIZE(int64_tests_sgt)}, + {"le", i64_le, i64_le_select, + int64_tests_sle, ARR_SIZE(int64_tests_sle)}, + {"ge", i64_ge, i64_ge_select, + int64_tests_sge, ARR_SIZE(int64_tests_sge)}, + {"lt", i64_lt, i64_lt_select, + int64_tests_slt, ARR_SIZE(int64_tests_slt)} +}; + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +struct harness_uint64_pred uint64_tests_ugt[] = { + {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }, + {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c }, + {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c }, + {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d } +}; + +struct harness_uint64_pred uint64_tests_ule[] = { + {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c} +}; + +struct harness_uint64_pred uint64_tests_uge[] = { + {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c} +}; + +struct harness_uint64_pred uint64_tests_ult[] = { + {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, + {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, + {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, + (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d} +}; + +struct uint64_pred_s uint64_preds[] = { + {"ugt", i64_ugt, i64_ugt_select, + uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)}, + {"ule", i64_ule, i64_ule_select, + uint64_tests_ule, ARR_SIZE(uint64_tests_ule)}, + {"uge", i64_uge, i64_uge_select, + uint64_tests_uge, ARR_SIZE(uint64_tests_uge)}, + {"ult", i64_ult, i64_ult_select, + uint64_tests_ult, ARR_SIZE(uint64_tests_ult)} +}; + +int +compare_expect_int64(const struct int64_pred_s * pred) +{ + int j, failed = 0; + + for (j = 0; j < pred->n_tests; ++j) { + int pred_result; + + pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs); + + if (pred_result != pred->tests[j].expected) { + char str[64]; + + sprintf(str, pred->tests[j].fmt_string, pred->name); + printf("%s: returned value is %d, expecting %d\n", str, + pred_result, pred->tests[j].expected); + printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs, + *pred->tests[j].lhs); + printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs, + *pred->tests[j].rhs); + ++failed; + } else { + int64_t selresult; + + selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs, + *pred->tests[j].select_a, + *pred->tests[j].select_b); + + if (selresult != *pred->tests[j].select_expected) { + char str[64]; + + sprintf(str, pred->tests[j].fmt_string, pred->name); + printf("%s select: returned value is %d, expecting %d\n", str, + pred_result, pred->tests[j].expected); + printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs, + *pred->tests[j].lhs); + printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs, + *pred->tests[j].rhs); + printf(" true = %19lld (0x%016llx)\n", *pred->tests[j].select_a, + *pred->tests[j].select_a); + printf(" false = %19lld (0x%016llx)\n", *pred->tests[j].select_b, + *pred->tests[j].select_b); + ++failed; + } + } + } + + printf(" %d tests performed, should be %d.\n", j, pred->n_tests); + + return failed; +} + +int +compare_expect_uint64(const struct uint64_pred_s * pred) +{ + int j, failed = 0; + + for (j = 0; j < pred->n_tests; ++j) { + int pred_result; + + pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs); + if (pred_result != pred->tests[j].expected) { + char str[64]; + + sprintf(str, pred->tests[j].fmt_string, pred->name); + printf("%s: returned value is %d, expecting %d\n", str, + pred_result, pred->tests[j].expected); + printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs, + *pred->tests[j].lhs); + printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs, + *pred->tests[j].rhs); + ++failed; + } else { + uint64_t selresult; + + selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs, + *pred->tests[j].select_a, + *pred->tests[j].select_b); + if (selresult != *pred->tests[j].select_expected) { + char str[64]; + + sprintf(str, pred->tests[j].fmt_string, pred->name); + printf("%s select: returned value is %d, expecting %d\n", str, + pred_result, pred->tests[j].expected); + printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs, + *pred->tests[j].lhs); + printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs, + *pred->tests[j].rhs); + printf(" true = %19llu (0x%016llx)\n", *pred->tests[j].select_a, + *pred->tests[j].select_a); + printf(" false = %19llu (0x%016llx)\n", *pred->tests[j].select_b, + *pred->tests[j].select_b); + ++failed; + } + } + } + + printf(" %d tests performed, should be %d.\n", j, pred->n_tests); + + return failed; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +int +test_i64_sext_i32(int in, int64_t expected) { + int64_t result = (int64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result); + return 1; + } + + return 0; +} + +int +test_i64_sext_i16(short in, int64_t expected) { + int64_t result = (int64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result); + return 1; + } + + return 0; +} + +int +test_i64_sext_i8(signed char in, int64_t expected) { + int64_t result = (int64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result); + return 1; + } + + return 0; +} + +int +test_i64_zext_i32(unsigned int in, uint64_t expected) { + uint64_t result = (uint64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result); + return 1; + } + + return 0; +} + +int +test_i64_zext_i16(unsigned short in, uint64_t expected) { + uint64_t result = (uint64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result); + return 1; + } + + return 0; +} + +int +test_i64_zext_i8(unsigned char in, uint64_t expected) { + uint64_t result = (uint64_t) in; + + if (result != expected) { + char str[64]; + sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result); + return 1; + } + + return 0; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +int64_t +i64_shl_const(int64_t a) { + return a << 10; +} + +int64_t +i64_shl(int64_t a, int amt) { + return a << amt; +} + +uint64_t +u64_shl_const(uint64_t a) { + return a << 10; +} + +uint64_t +u64_shl(uint64_t a, int amt) { + return a << amt; +} + +int64_t +i64_srl_const(int64_t a) { + return a >> 10; +} + +int64_t +i64_srl(int64_t a, int amt) { + return a >> amt; +} + +uint64_t +u64_srl_const(uint64_t a) { + return a >> 10; +} + +uint64_t +u64_srl(uint64_t a, int amt) { + return a >> amt; +} + +int64_t +i64_sra_const(int64_t a) { + return a >> 10; +} + +int64_t +i64_sra(int64_t a, int amt) { + return a >> amt; +} + +uint64_t +u64_sra_const(uint64_t a) { + return a >> 10; +} + +uint64_t +u64_sra(uint64_t a, int amt) { + return a >> amt; +} + +int +test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) { + uint64_t result = (*func)(a); + + if (result != expected) { + printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected); + return 1; + } + + return 0; +} + +int +test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) { + int64_t result = (*func)(a); + + if (result != expected) { + printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected); + return 1; + } + + return 0; +} + +int +test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) { + uint64_t result = (*func)(a, b); + + if (result != expected) { + printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected); + return 1; + } + + return 0; +} + +int +test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) { + int64_t result = (*func)(a, b); + + if (result != expected) { + printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected); + return 1; + } + + return 0; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +int64_t i64_mul(int64_t a, int64_t b) { + return a * b; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + +int +main(void) +{ + int i, j, failed = 0; + const char *something_failed = " %d tests failed.\n"; + const char *all_tests_passed = " All tests passed.\n"; + + printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a); + printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b); + printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c); + printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d); + printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e); + printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f); + printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g); + printf("----------------------------------------\n"); + + for (i = 0; i < ARR_SIZE(int64_preds); ++i) { + printf("%s series:\n", int64_preds[i].name); + if ((failed = compare_expect_int64(int64_preds + i)) > 0) { + printf(something_failed, failed); + } else { + printf(all_tests_passed); + } + + printf("----------------------------------------\n"); + } + + for (i = 0; i < ARR_SIZE(uint64_preds); ++i) { + printf("%s series:\n", uint64_preds[i].name); + if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) { + printf(something_failed, failed); + } else { + printf(all_tests_passed); + } + + printf("----------------------------------------\n"); + } + + /*----------------------------------------------------------------------*/ + + puts("signed/zero-extend tests:"); + + failed = 0; + failed += test_i64_sext_i32(-1, -1LL); + failed += test_i64_sext_i32(10, 10LL); + failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL); + failed += test_i64_sext_i16(-1, -1LL); + failed += test_i64_sext_i16(10, 10LL); + failed += test_i64_sext_i16(0x7fff, 0x7fffLL); + failed += test_i64_sext_i8(-1, -1LL); + failed += test_i64_sext_i8(10, 10LL); + failed += test_i64_sext_i8(0x7f, 0x7fLL); + + failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU); + failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU); + failed += test_i64_zext_i16(0xffff, 0x000000000000ffffLLU); + failed += test_i64_zext_i16(0x569a, 0x000000000000569aLLU); + failed += test_i64_zext_i8(0xff, 0x00000000000000ffLLU); + failed += test_i64_zext_i8(0xa0, 0x00000000000000a0LLU); + + if (failed > 0) { + printf(" %d tests failed.\n", failed); + } else { + printf(" All tests passed.\n"); + } + + printf("----------------------------------------\n"); + + failed = 0; + puts("signed left/right shift tests:"); + failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a, 0x00047dc7ec114c00LL); + failed += test_i64_variable_shift("i64_shl", i64_shl, tval_a, 10, 0x00047dc7ec114c00LL); + failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a, 0x0000000047dc7ec1LL); + failed += test_i64_variable_shift("i64_srl", i64_srl, tval_a, 10, 0x0000000047dc7ec1LL); + failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a, 0x0000000047dc7ec1LL); + failed += test_i64_variable_shift("i64_sra", i64_sra, tval_a, 10, 0x0000000047dc7ec1LL); + + if (failed > 0) { + printf(" %d tests ailed.\n", failed); + } else { + printf(" All tests passed.\n"); + } + + printf("----------------------------------------\n"); + + failed = 0; + puts("unsigned left/right shift tests:"); + failed += test_u64_constant_shift("u64_shl_const", u64_shl_const, tval_f, 0xfffc1d404d7ae400LL); + failed += test_u64_variable_shift("u64_shl", u64_shl, tval_f, 10, 0xfffc1d404d7ae400LL); + failed += test_u64_constant_shift("u64_srl_const", u64_srl_const, tval_f, 0x003fffffc1d404d7LL); + failed += test_u64_variable_shift("u64_srl", u64_srl, tval_f, 10, 0x003fffffc1d404d7LL); + failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_f, 0xffffffffc1d404d7LL); + failed += test_i64_variable_shift("i64_sra", i64_sra, tval_f, 10, 0xffffffffc1d404d7LL); + failed += test_u64_constant_shift("u64_sra_const", u64_sra_const, tval_f, 0x003fffffc1d404d7LL); + failed += test_u64_variable_shift("u64_sra", u64_sra, tval_f, 10, 0x003fffffc1d404d7LL); + + if (failed > 0) { + printf(" %d tests ailed.\n", failed); + } else { + printf(" All tests passed.\n"); + } + + printf("----------------------------------------\n"); + + int64_t result; + + result = i64_mul(tval_g, tval_g); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result); + result = i64_mul(tval_d, tval_e); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result); + /* 0xba7a664f13077c9 */ + result = i64_mul(tval_a, tval_b); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result); + + printf("----------------------------------------\n"); + + return 0; +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h new file mode 100644 index 000000000000..7a02794cd7e0 --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h @@ -0,0 +1,43 @@ +#define TRUE_VAL (!0) +#define FALSE_VAL 0 +#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0])) + +typedef unsigned long long int uint64_t; +typedef long long int int64_t; + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ +struct harness_int64_pred { + const char *fmt_string; + int64_t *lhs; + int64_t *rhs; + int64_t *select_a; + int64_t *select_b; + int expected; + int64_t *select_expected; +}; + +struct harness_uint64_pred { + const char *fmt_string; + uint64_t *lhs; + uint64_t *rhs; + uint64_t *select_a; + uint64_t *select_b; + int expected; + uint64_t *select_expected; +}; + +struct int64_pred_s { + const char *name; + int (*predfunc) (int64_t, int64_t); + int64_t (*selfunc) (int64_t, int64_t, int64_t, int64_t); + struct harness_int64_pred *tests; + int n_tests; +}; + +struct uint64_pred_s { + const char *name; + int (*predfunc) (uint64_t, uint64_t); + uint64_t (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t); + struct harness_uint64_pred *tests; + int n_tests; +}; diff --git a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c new file mode 100644 index 000000000000..c4c86e37635d --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c @@ -0,0 +1,179 @@ +#include <stdio.h> + +typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); +typedef short v8i16 __attribute__((ext_vector_type(16))); +typedef int v4i32 __attribute__((ext_vector_type(4))); +typedef float v4f32 __attribute__((ext_vector_type(4))); +typedef long long v2i64 __attribute__((ext_vector_type(2))); +typedef double v2f64 __attribute__((ext_vector_type(2))); + +void print_v16i8(const char *str, const v16i8 v) { + union { + unsigned char elts[16]; + v16i8 vec; + } tv; + tv.vec = v; + printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, " + "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, " + "%hhu, %hhu }\n", + str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5], + tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11], + tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]); +} + +void print_v16i8_hex(const char *str, const v16i8 v) { + union { + unsigned char elts[16]; + v16i8 vec; + } tv; + tv.vec = v; + printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, " + "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, " + "0x%02hhx, 0x%02hhx }\n", + str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5], + tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11], + tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]); +} + +void print_v8i16_hex(const char *str, v8i16 v) { + union { + short elts[8]; + v8i16 vec; + } tv; + tv.vec = v; + printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, " + "0x%04hx, 0x%04hx, 0x%04hx }\n", + str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], + tv.elts[5], tv.elts[6], tv.elts[7]); +} + +void print_v4i32(const char *str, v4i32 v) { + printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w); +} + +void print_v4f32(const char *str, v4f32 v) { + printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w); +} + +void print_v2i64(const char *str, v2i64 v) { + printf("%s = { %lld, %lld }\n", str, v.x, v.y); +} + +void print_v2f64(const char *str, v2f64 v) { + printf("%s = { %g, %g }\n", str, v.x, v.y); +} + +/*----------------------------------------------------------------------*/ + +v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) { + return v1 * v2; +} + +v16i8 v16i8_add(v16i8 v1, v16i8 v2) { + return v1 + v2; +} + +v4i32 v4i32_shuffle_1(v4i32 a) { + v4i32 c2 = a.yzwx; + return c2; +} + +v4i32 v4i32_shuffle_2(v4i32 a) { + v4i32 c2 = a.zwxy; + return c2; +} + +v4i32 v4i32_shuffle_3(v4i32 a) { + v4i32 c2 = a.wxyz; + return c2; +} + +v4i32 v4i32_shuffle_4(v4i32 a) { + v4i32 c2 = a.xyzw; + return c2; +} + +v4i32 v4i32_shuffle_5(v4i32 a) { + v4i32 c2 = a.xwzy; + return c2; +} + +v4f32 v4f32_shuffle_1(v4f32 a) { + v4f32 c2 = a.yzwx; + return c2; +} + +v4f32 v4f32_shuffle_2(v4f32 a) { + v4f32 c2 = a.zwxy; + return c2; +} + +v4f32 v4f32_shuffle_3(v4f32 a) { + v4f32 c2 = a.wxyz; + return c2; +} + +v4f32 v4f32_shuffle_4(v4f32 a) { + v4f32 c2 = a.xyzw; + return c2; +} + +v4f32 v4f32_shuffle_5(v4f32 a) { + v4f32 c2 = a.xwzy; + return c2; +} + +v2i64 v2i64_shuffle(v2i64 a) { + v2i64 c2 = a.yx; + return c2; +} + +v2f64 v2f64_shuffle(v2f64 a) { + v2f64 c2 = a.yx; + return c2; +} + +int main(void) { + v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a, + 0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 }; + v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 }; + v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5, + 0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 }; + v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0, + 0xe194, 0x0184, 0x801e, 0x5940 }; + v4i32 v1 = { 1, 2, 3, 4 }; + v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 }; + v2i64 v3 = { 691043ll, 910301513ll }; + v2f64 v4 = { 5.8e56, 9.103e-62 }; + + puts("---- vector tests start ----"); + + print_v16i8_hex("v00 ", v00); + print_v16i8_hex("va0 ", va0); + print_v16i8_hex("va1 ", va1); + print_v16i8_hex("va0 x va1 ", v16i8_mpy(va0, va1)); + print_v16i8_hex("va0 + va1 ", v16i8_add(va0, va1)); + print_v8i16_hex("v01 ", v01); + + print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1)); + print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1)); + print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1)); + print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1)); + print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1)); + + print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2)); + print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2)); + print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2)); + print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2)); + print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2)); + + print_v2i64("v3 ", v3); + print_v2i64("v2i64_shuffle ", v2i64_shuffle(v3)); + print_v2f64("v4 ", v4); + print_v2f64("v2f64_shuffle ", v2f64_shuffle(v4)); + + puts("---- vector tests end ----"); + + return 0; +} diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll new file mode 100644 index 000000000000..4b29adc80921 --- /dev/null +++ b/test/CodeGen/CellSPU/vec_const.ll @@ -0,0 +1,154 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s +; RUN: grep -w il %t1.s | count 3 +; RUN: grep ilhu %t1.s | count 8 +; RUN: grep -w ilh %t1.s | count 5 +; RUN: grep iohl %t1.s | count 7 +; RUN: grep lqa %t1.s | count 6 +; RUN: grep 24672 %t1.s | count 2 +; RUN: grep 16429 %t1.s | count 1 +; RUN: grep 63572 %t1.s | count 1 +; RUN: grep 4660 %t1.s | count 1 +; RUN: grep 22136 %t1.s | count 1 +; RUN: grep 43981 %t1.s | count 1 +; RUN: grep 61202 %t1.s | count 1 +; RUN: grep 16393 %t1.s | count 1 +; RUN: grep 8699 %t1.s | count 1 +; RUN: grep 21572 %t1.s | count 1 +; RUN: grep 11544 %t1.s | count 1 +; RUN: grep 1311768467750121234 %t1.s | count 1 +; RUN: grep lqd %t2.s | count 6 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" +target triple = "spu-unknown-elf" + +; Vector constant load tests: + +; IL <reg>, 2 +define <4 x i32> @v4i32_constvec() { + ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 > +} + +; Spill to constant pool +define <4 x i32> @v4i32_constpool() { + ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 > +} + +; Max negative range for IL +define <4 x i32> @v4i32_constvec_2() { + ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 > +} + +; ILHU <reg>, 73 (0x49) +; 4784128 = 0x490000 +define <4 x i32> @v4i32_constvec_3() { + ret <4 x i32> < i32 4784128, i32 4784128, + i32 4784128, i32 4784128 > +} + +; ILHU <reg>, 61 (0x3d) +; IOHL <reg>, 15395 (0x3c23) +define <4 x i32> @v4i32_constvec_4() { + ret <4 x i32> < i32 4013091, i32 4013091, + i32 4013091, i32 4013091 > +} + +; ILHU <reg>, 0x5050 (20560) +; IOHL <reg>, 0x5050 (20560) +; Tests for whether we expand the size of the bit pattern properly, because +; this could be interpreted as an i8 pattern (0x50) +define <4 x i32> @v4i32_constvec_5() { + ret <4 x i32> < i32 1347440720, i32 1347440720, + i32 1347440720, i32 1347440720 > +} + +; ILH +define <8 x i16> @v8i16_constvec_1() { + ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767, + i16 32767, i16 32767, i16 32767, i16 32767 > +} + +; ILH +define <8 x i16> @v8i16_constvec_2() { + ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511, + i16 511, i16 511, i16 511 > +} + +; ILH +define <8 x i16> @v8i16_constvec_3() { + ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, + i16 -512, i16 -512, i16 -512 > +} + +; ILH <reg>, 24672 (0x6060) +; Tests whether we expand the size of the bit pattern properly, because +; this could be interpreted as an i8 pattern (0x60) +define <8 x i16> @v8i16_constvec_4() { + ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, + i16 24672, i16 24672, i16 24672 > +} + +; ILH <reg>, 24672 (0x6060) +; Tests whether we expand the size of the bit pattern properly, because +; this is an i8 pattern but has to be expanded out to i16 to load it +; properly into the vector register. +define <16 x i8> @v16i8_constvec_1() { + ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, + i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 > +} + +define <4 x float> @v4f32_constvec_1() { +entry: + ret <4 x float> < float 0x4005BF0A80000000, + float 0x4005BF0A80000000, + float 0x4005BF0A80000000, + float 0x4005BF0A80000000 > +} + +define <4 x float> @v4f32_constvec_2() { +entry: + ret <4 x float> < float 0.000000e+00, + float 0.000000e+00, + float 0.000000e+00, + float 0.000000e+00 > +} + + +define <4 x float> @v4f32_constvec_3() { +entry: + ret <4 x float> < float 0x4005BF0A80000000, + float 0x3810000000000000, + float 0x47EFFFFFE0000000, + float 0x400921FB60000000 > +} + +; 1311768467750121234 => 0x 12345678 abcdef12 +; HI32_hi: 4660 +; HI32_lo: 22136 +; LO32_hi: 43981 +; LO32_lo: 61202 +define <2 x i64> @i64_constvec_1() { +entry: + ret <2 x i64> < i64 1311768467750121234, + i64 1311768467750121234 > +} + +define <2 x i64> @i64_constvec_2() { +entry: + ret <2 x i64> < i64 1, i64 1311768467750121234 > +} + +define <2 x double> @f64_constvec_1() { +entry: + ret <2 x double> < double 0x400921fb54442d18, + double 0xbff6a09e667f3bcd > +} + +; 0x400921fb 54442d18 -> +; (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699]) +; (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544]) +define <2 x double> @f64_constvec_2() { +entry: + ret <2 x double> < double 0x400921fb54442d18, + double 0x400921fb54442d18 > +} diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll new file mode 100644 index 000000000000..6abbd9ac797d --- /dev/null +++ b/test/CodeGen/CellSPU/vecinsert.ll @@ -0,0 +1,120 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep cbd %t1.s | count 5 +; RUN: grep chd %t1.s | count 5 +; RUN: grep cwd %t1.s | count 10 +; RUN: grep -w il %t1.s | count 5 +; RUN: grep -w ilh %t1.s | count 6 +; RUN: grep iohl %t1.s | count 1 +; RUN: grep ilhu %t1.s | count 4 +; RUN: grep shufb %t1.s | count 26 +; RUN: grep 17219 %t1.s | count 1 +; RUN: grep 22598 %t1.s | count 1 +; RUN: grep -- -39 %t1.s | count 1 +; RUN: grep 24 %t1.s | count 1 +; RUN: grep 1159 %t1.s | count 1 +; ModuleID = 'vecinsert.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" +target triple = "spu-unknown-elf" + +; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343 +define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) { +entry: + %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10 + %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7 + %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15 + ret <16 x i8> %tmp1.2 +} + +; 22598 -> 0x5846 +define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) { +entry: + %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5 + %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7 + %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2 + ret <8 x i16> %tmp1.2 +} + +; 1574023 -> 0x180487 (ILHU 24/IOHL 1159) +define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) { +entry: + %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 + %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1 + %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 + ret <4 x i32> %tmp1.2 +} + +; Should generate IL for the load +define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) { +entry: + %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 + %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1 + %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 + ret <4 x i32> %tmp1.2 +} + +define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <16 x i8>* %a, i32 %i + %tmp2 = load <16 x i8>* %arrayidx + %tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1 + %tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11 + store <16 x i8> %tmp8, <16 x i8>* %arrayidx + ret void +} + +define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <8 x i16>* %a, i32 %i + %tmp2 = load <8 x i16>* %arrayidx + %tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1 + %tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6 + store <8 x i16> %tmp8, <8 x i16>* %arrayidx + ret void +} + +define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x i32>* %a, i32 %i + %tmp2 = load <4 x i32>* %arrayidx + %tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1 + %tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2 + store <4 x i32> %tmp8, <4 x i32>* %arrayidx + ret void +} + +define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 %i + %tmp2 = load <4 x float>* %arrayidx + %tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1 + %tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2 + store <4 x float> %tmp8, <4 x float>* %arrayidx + ret void +} + +define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x i64>* %a, i32 %i + %tmp2 = load <2 x i64>* %arrayidx + %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0 + store <2 x i64> %tmp3, <2 x i64>* %arrayidx + ret void +} + +define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x i64>* %a, i32 %i + %tmp2 = load <2 x i64>* %arrayidx + %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1 + store <2 x i64> %tmp3, <2 x i64>* %arrayidx + ret void +} + +define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x double>* %a, i32 %i + %tmp2 = load <2 x double>* %arrayidx + %tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1 + store <2 x double> %tmp3, <2 x double>* %arrayidx + ret void +} |