summaryrefslogtreecommitdiff
path: root/test/CodeGen/CellSPU
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/CellSPU')
-rw-r--r--test/CodeGen/CellSPU/2009-01-01-BrCond.ll31
-rw-r--r--test/CodeGen/CellSPU/and_ops.ll279
-rw-r--r--test/CodeGen/CellSPU/call.ll28
-rw-r--r--test/CodeGen/CellSPU/call_indirect.ll49
-rw-r--r--test/CodeGen/CellSPU/ctpop.ll30
-rw-r--r--test/CodeGen/CellSPU/dg.exp5
-rw-r--r--test/CodeGen/CellSPU/dp_farith.ll102
-rw-r--r--test/CodeGen/CellSPU/eqv.ll152
-rw-r--r--test/CodeGen/CellSPU/extract_elt.ll277
-rw-r--r--test/CodeGen/CellSPU/fcmp32.ll23
-rw-r--r--test/CodeGen/CellSPU/fcmp64.ll7
-rw-r--r--test/CodeGen/CellSPU/fdiv.ll22
-rw-r--r--test/CodeGen/CellSPU/fneg-fabs.ll42
-rw-r--r--test/CodeGen/CellSPU/i64ops.ll57
-rw-r--r--test/CodeGen/CellSPU/i8ops.ll25
-rw-r--r--test/CodeGen/CellSPU/icmp16.ll350
-rw-r--r--test/CodeGen/CellSPU/icmp32.ll350
-rw-r--r--test/CodeGen/CellSPU/icmp64.ll146
-rw-r--r--test/CodeGen/CellSPU/icmp8.ll286
-rw-r--r--test/CodeGen/CellSPU/immed16.ll40
-rw-r--r--test/CodeGen/CellSPU/immed32.ll72
-rw-r--r--test/CodeGen/CellSPU/immed64.ll95
-rw-r--r--test/CodeGen/CellSPU/int2fp.ll41
-rw-r--r--test/CodeGen/CellSPU/intrinsics_branch.ll150
-rw-r--r--test/CodeGen/CellSPU/intrinsics_float.ll94
-rw-r--r--test/CodeGen/CellSPU/intrinsics_logical.ll49
-rw-r--r--test/CodeGen/CellSPU/loads.ll20
-rw-r--r--test/CodeGen/CellSPU/mul_ops.ll89
-rw-r--r--test/CodeGen/CellSPU/nand.ll121
-rw-r--r--test/CodeGen/CellSPU/or_ops.ll264
-rw-r--r--test/CodeGen/CellSPU/private.ll22
-rw-r--r--test/CodeGen/CellSPU/rotate_ops.ll160
-rw-r--r--test/CodeGen/CellSPU/select_bits.ll569
-rw-r--r--test/CodeGen/CellSPU/shift_ops.ll283
-rw-r--r--test/CodeGen/CellSPU/sp_farith.ll90
-rw-r--r--test/CodeGen/CellSPU/stores.ll151
-rw-r--r--test/CodeGen/CellSPU/struct_1.ll144
-rw-r--r--test/CodeGen/CellSPU/trunc.ll94
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/README.txt5
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i32operations.c69
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.c673
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.h43
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/vecoperations.c179
-rw-r--r--test/CodeGen/CellSPU/vec_const.ll154
-rw-r--r--test/CodeGen/CellSPU/vecinsert.ll120
45 files changed, 6052 insertions, 0 deletions
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
new file mode 100644
index 000000000000..75e0ed0cd2fc
--- /dev/null
+++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=cellspu -o - | grep brz
+; PR3274
+
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+ %struct.anon = type { i64 }
+ %struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
+
+define double @__floatunsidf(i32 %arg_a) nounwind {
+entry:
+ %in = alloca %struct.fp_number_type, align 16
+ %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
+ store i32 0, i32* %0, align 4
+ %1 = icmp eq i32 %arg_a, 0
+ %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
+ br i1 %1, label %bb, label %bb1
+
+bb: ; preds = %entry
+ store i32 2, i32* %2, align 8
+ br label %bb7
+
+bb1: ; preds = %entry
+ ret double 0.0
+
+bb7: ; preds = %bb5, %bb1, %bb
+ ret double 1.0
+}
+
+; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+declare double @__pack_d(%struct.fp_number_type*)
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
new file mode 100644
index 000000000000..a18b6f8d05fc
--- /dev/null
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -0,0 +1,279 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 234
+; RUN: grep andc %t1.s | count 85
+; RUN: grep andi %t1.s | count 37
+; RUN: grep andhi %t1.s | count 30
+; RUN: grep andbi %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; AND instruction generation:
+define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = and <4 x i32> %arg1, %arg2
+ ret <4 x i32> %A
+}
+
+define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = and <4 x i32> %arg2, %arg1
+ ret <4 x i32> %A
+}
+
+define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = and <8 x i16> %arg1, %arg2
+ ret <8 x i16> %A
+}
+
+define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = and <8 x i16> %arg2, %arg1
+ ret <8 x i16> %A
+}
+
+define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = and <16 x i8> %arg2, %arg1
+ ret <16 x i8> %A
+}
+
+define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = and <16 x i8> %arg1, %arg2
+ ret <16 x i8> %A
+}
+
+define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
+ %A = and i32 %arg2, %arg1
+ ret i32 %A
+}
+
+define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
+ %A = and i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
+ %A = and i16 %arg2, %arg1
+ ret i16 %A
+}
+
+define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
+ %A = and i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
+ %A = and i8 %arg2, %arg1
+ ret i8 %A
+}
+
+define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
+ %A = and i8 %arg1, %arg2
+ ret i8 %A
+}
+
+; ANDC instruction generation:
+define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %arg1, %A
+ ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %arg2, %A
+ ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %A, %arg2
+ ret <4 x i32> %B
+}
+
+define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %arg1, %A
+ ret <8 x i16> %B
+}
+
+define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %arg2, %A
+ ret <8 x i16> %B
+}
+
+define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %arg2, %A
+ ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %arg1, %A
+ ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %A, %arg1
+ ret <16 x i8> %B
+}
+
+define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg2, -1
+ %B = and i32 %A, %arg1
+ ret i32 %B
+}
+
+define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg1, -1
+ %B = and i32 %A, %arg2
+ ret i32 %B
+}
+
+define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg2, -1
+ %B = and i32 %arg1, %A
+ ret i32 %B
+}
+
+define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg2, -1
+ %B = and i16 %A, %arg1
+ ret i16 %B
+}
+
+define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg1, -1
+ %B = and i16 %A, %arg2
+ ret i16 %B
+}
+
+define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg2, -1
+ %B = and i16 %arg1, %A
+ ret i16 %B
+}
+
+define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg2, -1
+ %B = and i8 %A, %arg1
+ ret i8 %B
+}
+
+define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg1, -1
+ %B = and i8 %A, %arg2
+ ret i8 %B
+}
+
+define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg2, -1
+ %B = and i8 %arg1, %A
+ ret i8 %B
+}
+
+; ANDI instruction generation (i32 data type):
+define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
+ %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
+ %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
+ %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
+ %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+ ret <4 x i32> %tmp2
+}
+
+define i32 @andi_u32(i32 zeroext %in) zeroext {
+ %tmp37 = and i32 %in, 37
+ ret i32 %tmp37
+}
+
+define i32 @andi_i32(i32 signext %in) signext {
+ %tmp38 = and i32 %in, 37
+ ret i32 %tmp38
+}
+
+define i32 @andi_i32_1(i32 %in) {
+ %tmp37 = and i32 %in, 37
+ ret i32 %tmp37
+}
+
+; ANDHI instruction generation (i16 data type):
+define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
+ %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+ i16 511, i16 511, i16 511, i16 511 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
+ %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+ i16 510, i16 510, i16 510, i16 510 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
+ %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
+ %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+ i16 -512, i16 -512, i16 -512, i16 -512 >
+ ret <8 x i16> %tmp2
+}
+
+define i16 @andhi_u16(i16 zeroext %in) zeroext {
+ %tmp37 = and i16 %in, 37 ; <i16> [#uses=1]
+ ret i16 %tmp37
+}
+
+define i16 @andhi_i16(i16 signext %in) signext {
+ %tmp38 = and i16 %in, 37 ; <i16> [#uses=1]
+ ret i16 %tmp38
+}
+
+; i8 data type (s/b ANDBI if 8-bit registers were supported):
+define <16 x i8> @and_v16i8(<16 x i8> %in) {
+ ; ANDBI generated for vector types
+ %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+ i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+ i8 42, i8 42, i8 42, i8 42 >
+ ret <16 x i8> %tmp2
+}
+
+define i8 @and_u8(i8 zeroext %in) zeroext {
+ ; ANDBI generated:
+ %tmp37 = and i8 %in, 37
+ ret i8 %tmp37
+}
+
+define i8 @and_sext8(i8 signext %in) signext {
+ ; ANDBI generated
+ %tmp38 = and i8 %in, 37
+ ret i8 %tmp38
+}
+
+define i8 @and_i8(i8 %in) {
+ ; ANDBI generated
+ %tmp38 = and i8 %in, 205
+ ret i8 %tmp38
+}
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
new file mode 100644
index 000000000000..a305a2354041
--- /dev/null
+++ b/test/CodeGen/CellSPU/call.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep brsl %t1.s | count 1
+; RUN: grep brasl %t1.s | count 1
+; RUN: grep stqd %t1.s | count 80
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @main() {
+entry:
+ %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
+ call void @extern_stub_1(i32 %a, i32 4)
+ ret i32 %a
+}
+
+declare void @extern_stub_1(i32, i32)
+
+define i32 @stub_1(i32 %x, float %y) {
+entry:
+ ret i32 0
+}
+
+; vararg call: ensure that all caller-saved registers are spilled to the
+; stack:
+define i32 @stub_2(...) {
+entry:
+ ret i32 0
+}
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
new file mode 100644
index 000000000000..9be714ebc9b8
--- /dev/null
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep bisl %t1.s | count 7
+; RUN: grep ila %t1.s | count 1
+; RUN: grep rotqby %t1.s | count 6
+; RUN: grep lqa %t1.s | count 1
+; RUN: grep lqd %t1.s | count 12
+; RUN: grep dispatch_tab %t1.s | count 5
+; RUN: grep bisl %t2.s | count 7
+; RUN: grep ilhu %t2.s | count 2
+; RUN: grep iohl %t2.s | count 2
+; RUN: grep rotqby %t2.s | count 6
+; RUN: grep lqd %t2.s | count 13
+; RUN: grep ilhu %t2.s | count 2
+; RUN: grep ai %t2.s | count 9
+; RUN: grep dispatch_tab %t2.s | count 6
+
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+ %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+ tail call void %tmp2( i32 %i_arg, float %f_arg )
+ %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+ tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+ %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+ tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+ %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+ tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+ %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+ tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+ %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+ tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+ ret void
+}
+
+@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
+@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
+
+define void @double_indirect_call() {
+ %a = load void ()*** @ptr.a, align 16
+ %b = load void ()** %a, align 4
+ tail call void %b()
+ ret void
+}
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
new file mode 100644
index 000000000000..3c7ee7aeea2b
--- /dev/null
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep cntb %t1.s | count 3
+; RUN: grep andi %t1.s | count 3
+; RUN: grep rotmi %t1.s | count 2
+; RUN: grep rothmi %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare i8 @llvm.ctpop.i8(i8)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @test_i8(i8 %X) {
+ call i8 @llvm.ctpop.i8(i8 %X)
+ %Y = zext i8 %1 to i32
+ ret i32 %Y
+}
+
+define i32 @test_i16(i16 %X) {
+ call i16 @llvm.ctpop.i16(i16 %X)
+ %Y = zext i16 %1 to i32
+ ret i32 %Y
+}
+
+define i32 @test_i32(i32 %X) {
+ call i32 @llvm.ctpop.i32(i32 %X)
+ %Y = bitcast i32 %1 to i32
+ ret i32 %Y
+}
+
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
new file mode 100644
index 000000000000..d41647991a0a
--- /dev/null
+++ b/test/CodeGen/CellSPU/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CellSPU] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
new file mode 100644
index 000000000000..2579a404eea5
--- /dev/null
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -0,0 +1,102 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep dfa %t1.s | count 2
+; RUN: grep dfs %t1.s | count 2
+; RUN: grep dfm %t1.s | count 6
+; RUN: grep dfma %t1.s | count 2
+; RUN: grep dfms %t1.s | count 2
+; RUN: grep dfnms %t1.s | count 4
+;
+; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fadd(double %arg1, double %arg2) {
+ %A = add double %arg1, %arg2
+ ret double %A
+}
+
+define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
+ %A = add <2 x double> %arg1, %arg2
+ ret <2 x double> %A
+}
+
+define double @fsub(double %arg1, double %arg2) {
+ %A = sub double %arg1, %arg2
+ ret double %A
+}
+
+define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
+ %A = sub <2 x double> %arg1, %arg2
+ ret <2 x double> %A
+}
+
+define double @fmul(double %arg1, double %arg2) {
+ %A = mul double %arg1, %arg2
+ ret double %A
+}
+
+define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
+ %A = mul <2 x double> %arg1, %arg2
+ ret <2 x double> %A
+}
+
+define double @fma(double %arg1, double %arg2, double %arg3) {
+ %A = mul double %arg1, %arg2
+ %B = add double %A, %arg3
+ ret double %B
+}
+
+define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+ %A = mul <2 x double> %arg1, %arg2
+ %B = add <2 x double> %A, %arg3
+ ret <2 x double> %B
+}
+
+define double @fms(double %arg1, double %arg2, double %arg3) {
+ %A = mul double %arg1, %arg2
+ %B = sub double %A, %arg3
+ ret double %B
+}
+
+define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+ %A = mul <2 x double> %arg1, %arg2
+ %B = sub <2 x double> %A, %arg3
+ ret <2 x double> %B
+}
+
+; - (a * b - c)
+define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
+ %A = mul double %arg1, %arg2
+ %B = sub double %A, %arg3
+ %C = sub double -0.000000e+00, %B ; <double> [#uses=1]
+ ret double %C
+}
+
+; Annother way of getting fnms
+; - ( a * b ) + c => c - (a * b)
+define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
+ %A = mul double %arg1, %arg2
+ %B = sub double %arg3, %A
+ ret double %B
+}
+
+; FNMS: - (a * b - c) => c - (a * b)
+define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+ %A = mul <2 x double> %arg1, %arg2
+ %B = sub <2 x double> %arg3, %A ;
+ ret <2 x double> %B
+}
+
+; Another way to get fnms using a constant vector
+; - ( a * b - c)
+define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+ %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1]
+ %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
+ %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
+ ret <2 x double> %C
+}
+
+;define double @fdiv_1(double %arg1, double %arg2) {
+; %A = fdiv double %arg1, %arg2 ; <double> [#uses=1]
+; ret double %A
+;}
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
new file mode 100644
index 000000000000..540695677205
--- /dev/null
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -0,0 +1,152 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep eqv %t1.s | count 18
+; RUN: grep xshw %t1.s | count 6
+; RUN: grep xsbh %t1.s | count 3
+; RUN: grep andi %t1.s | count 3
+
+; Test the 'eqv' instruction, whose boolean expression is:
+; (a & b) | (~a & ~b), which simplifies to
+; (a & b) | ~(a | b)
+; Alternatively, a ^ ~b, which the compiler will also match.
+
+; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = and <4 x i32> %arg1, %arg2
+ %B = or <4 x i32> %arg1, %arg2
+ %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %C = or <4 x i32> %A, %Bnot
+ ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
+ %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
+ %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
+ %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
+ %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
+ %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
+ %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %C = xor <4 x i32> %arg1, %arg2not
+ ret <4 x i32> %C
+}
+
+define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
+ %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
+ %C = or i32 %A, %Bnot ; <i32> [#uses=1]
+ ret i32 %C
+}
+
+define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
+ %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
+ %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %C = or i32 %A, %Bnot ; <i32> [#uses=1]
+ ret i32 %C
+}
+
+define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
+ %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
+ %C = or i32 %A, %Bnot ; <i32> [#uses=1]
+ ret i32 %C
+}
+
+define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
+ %arg2not = xor i32 %arg2, -1
+ %C = xor i32 %arg1, %arg2not
+ ret i32 %C
+}
+
+define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
+ %arg1not = xor i32 %arg1, -1
+ %C = xor i32 %arg2, %arg1not
+ ret i32 %C
+}
+
+define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
+ %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
+ %C = or i16 %A, %Bnot ; <i16> [#uses=1]
+ ret i16 %C
+}
+
+define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
+ %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
+ %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %C = or i16 %A, %Bnot ; <i16> [#uses=1]
+ ret i16 %C
+}
+
+define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+ %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
+ %C = or i16 %A, %Bnot ; <i16> [#uses=1]
+ ret i16 %C
+}
+
+define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
+
+define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
+
+define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext {
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
+
+define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
+
+define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
+
+define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+ %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
+ %C = or i8 %A, %Bnot ; <i8> [#uses=1]
+ ret i8 %C
+}
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
new file mode 100644
index 000000000000..bcd2f42aa77e
--- /dev/null
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -0,0 +1,277 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep shufb %t1.s | count 39
+; RUN: grep ilhu %t1.s | count 27
+; RUN: grep iohl %t1.s | count 27
+; RUN: grep lqa %t1.s | count 10
+; RUN: grep shlqby %t1.s | count 12
+; RUN: grep 515 %t1.s | count 1
+; RUN: grep 1029 %t1.s | count 2
+; RUN: grep 1543 %t1.s | count 2
+; RUN: grep 2057 %t1.s | count 2
+; RUN: grep 2571 %t1.s | count 2
+; RUN: grep 3085 %t1.s | count 2
+; RUN: grep 3599 %t1.s | count 2
+; RUN: grep 32768 %t1.s | count 1
+; RUN: grep 32769 %t1.s | count 1
+; RUN: grep 32770 %t1.s | count 1
+; RUN: grep 32771 %t1.s | count 1
+; RUN: grep 32772 %t1.s | count 1
+; RUN: grep 32773 %t1.s | count 1
+; RUN: grep 32774 %t1.s | count 1
+; RUN: grep 32775 %t1.s | count 1
+; RUN: grep 32776 %t1.s | count 1
+; RUN: grep 32777 %t1.s | count 1
+; RUN: grep 32778 %t1.s | count 1
+; RUN: grep 32779 %t1.s | count 1
+; RUN: grep 32780 %t1.s | count 1
+; RUN: grep 32781 %t1.s | count 1
+; RUN: grep 32782 %t1.s | count 1
+; RUN: grep 32783 %t1.s | count 1
+; RUN: grep 32896 %t1.s | count 24
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @i32_extract_0(<4 x i32> %v) {
+entry:
+ %a = extractelement <4 x i32> %v, i32 0
+ ret i32 %a
+}
+
+define i32 @i32_extract_1(<4 x i32> %v) {
+entry:
+ %a = extractelement <4 x i32> %v, i32 1
+ ret i32 %a
+}
+
+define i32 @i32_extract_2(<4 x i32> %v) {
+entry:
+ %a = extractelement <4 x i32> %v, i32 2
+ ret i32 %a
+}
+
+define i32 @i32_extract_3(<4 x i32> %v) {
+entry:
+ %a = extractelement <4 x i32> %v, i32 3
+ ret i32 %a
+}
+
+define i16 @i16_extract_0(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 0
+ ret i16 %a
+}
+
+define i16 @i16_extract_1(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 1
+ ret i16 %a
+}
+
+define i16 @i16_extract_2(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 2
+ ret i16 %a
+}
+
+define i16 @i16_extract_3(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 3
+ ret i16 %a
+}
+
+define i16 @i16_extract_4(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 4
+ ret i16 %a
+}
+
+define i16 @i16_extract_5(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 5
+ ret i16 %a
+}
+
+define i16 @i16_extract_6(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 6
+ ret i16 %a
+}
+
+define i16 @i16_extract_7(<8 x i16> %v) {
+entry:
+ %a = extractelement <8 x i16> %v, i32 7
+ ret i16 %a
+}
+
+define i8 @i8_extract_0(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 0
+ ret i8 %a
+}
+
+define i8 @i8_extract_1(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 1
+ ret i8 %a
+}
+
+define i8 @i8_extract_2(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 2
+ ret i8 %a
+}
+
+define i8 @i8_extract_3(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 3
+ ret i8 %a
+}
+
+define i8 @i8_extract_4(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 4
+ ret i8 %a
+}
+
+define i8 @i8_extract_5(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 5
+ ret i8 %a
+}
+
+define i8 @i8_extract_6(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 6
+ ret i8 %a
+}
+
+define i8 @i8_extract_7(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 7
+ ret i8 %a
+}
+
+define i8 @i8_extract_8(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 8
+ ret i8 %a
+}
+
+define i8 @i8_extract_9(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 9
+ ret i8 %a
+}
+
+define i8 @i8_extract_10(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 10
+ ret i8 %a
+}
+
+define i8 @i8_extract_11(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 11
+ ret i8 %a
+}
+
+define i8 @i8_extract_12(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 12
+ ret i8 %a
+}
+
+define i8 @i8_extract_13(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 13
+ ret i8 %a
+}
+
+define i8 @i8_extract_14(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 14
+ ret i8 %a
+}
+
+define i8 @i8_extract_15(<16 x i8> %v) {
+entry:
+ %a = extractelement <16 x i8> %v, i32 15
+ ret i8 %a
+}
+
+;;--------------------------------------------------------------------------
+;; extract element, variable index:
+;;--------------------------------------------------------------------------
+
+define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
+ ret i8 %0
+}
+
+define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <16 x i8> %v, i32 %i
+ ret i8 %0
+}
+
+define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
+ ret i16 %0
+}
+
+define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <8 x i16> %v, i32 %i
+ ret i16 %0
+}
+
+define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
+ ret i32 %0
+}
+
+define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <4 x i32> %v, i32 %i
+ ret i32 %0
+}
+
+define float @extract_varadic_f32(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
+ ret float %0
+}
+
+define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <4 x float> %v, i32 %i
+ ret float %0
+}
+
+define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
+ ret i64 %0
+}
+
+define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <2 x i64> %v, i32 %i
+ ret i64 %0
+}
+
+define double @extract_varadic_f64(i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
+ ret double %0
+}
+
+define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
+entry:
+ %0 = extractelement <2 x double> %v, i32 %i
+ ret double %0
+}
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
new file mode 100644
index 000000000000..27a659e82930
--- /dev/null
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fceq %t1.s | count 1
+; RUN: grep fcmeq %t1.s | count 1
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Exercise the floating point comparison operators for f32:
+
+declare double @fabs(double)
+declare float @fabsf(float)
+
+define i1 @fcmp_eq(float %arg1, float %arg2) {
+ %A = fcmp oeq float %arg1, %arg2
+ ret i1 %A
+}
+
+define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
+ %1 = call float @fabsf(float %arg1)
+ %2 = call float @fabsf(float %arg2)
+ %3 = fcmp oeq float %1, %2
+ ret i1 %3
+}
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
new file mode 100644
index 000000000000..1906bfe7ddaa
--- /dev/null
+++ b/test/CodeGen/CellSPU/fcmp64.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+
+define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
+entry:
+ %A = fcmp oeq double %arg1, %arg2
+ ret i1 %A
+}
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
new file mode 100644
index 000000000000..d121c3f8c907
--- /dev/null
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep frest %t1.s | count 2
+; RUN: grep -w fi %t1.s | count 2
+; RUN: grep -w fm %t1.s | count 2
+; RUN: grep fma %t1.s | count 2
+; RUN: grep fnms %t1.s | count 4
+; RUN: grep cgti %t1.s | count 2
+; RUN: grep selb %t1.s | count 2
+;
+; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fdiv32(float %arg1, float %arg2) {
+ %A = fdiv float %arg1, %arg2
+ ret float %A
+}
+
+define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
+ %A = fdiv <4 x float> %arg1, %arg2
+ ret <4 x float> %A
+}
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
new file mode 100644
index 000000000000..4c6fbb95a39f
--- /dev/null
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -0,0 +1,42 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep 32768 %t1.s | count 2
+; RUN: grep xor %t1.s | count 4
+; RUN: grep and %t1.s | count 2
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fneg_dp(double %X) {
+ %Y = sub double -0.000000e+00, %X
+ ret double %Y
+}
+
+define <2 x double> @fneg_dp_vec(<2 x double> %X) {
+ %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
+ ret <2 x double> %Y
+}
+
+define float @fneg_sp(float %X) {
+ %Y = sub float -0.000000e+00, %X
+ ret float %Y
+}
+
+define <4 x float> @fneg_sp_vec(<4 x float> %X) {
+ %Y = sub <4 x float> <float -0.000000e+00, float -0.000000e+00,
+ float -0.000000e+00, float -0.000000e+00>, %X
+ ret <4 x float> %Y
+}
+
+declare double @fabs(double)
+
+declare float @fabsf(float)
+
+define double @fabs_dp(double %X) {
+ %Y = call double @fabs( double %X )
+ ret double %Y
+}
+
+define float @fabs_sp(float %X) {
+ %Y = call float @fabsf( float %X )
+ ret float %Y
+}
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
new file mode 100644
index 000000000000..dd6782772a5d
--- /dev/null
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -0,0 +1,57 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep xswd %t1.s | count 3
+; RUN: grep xsbh %t1.s | count 1
+; RUN: grep xshw %t1.s | count 2
+; RUN: grep shufb %t1.s | count 7
+; RUN: grep cg %t1.s | count 4
+; RUN: grep addx %t1.s | count 4
+; RUN: grep fsmbi %t1.s | count 3
+; RUN: grep il %t1.s | count 2
+; RUN: grep mpy %t1.s | count 10
+; RUN: grep mpyh %t1.s | count 6
+; RUN: grep mpyhhu %t1.s | count 2
+; RUN: grep mpyu %t1.s | count 4
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i64 @sext_i64_i8(i8 %a) nounwind {
+ %1 = sext i8 %a to i64
+ ret i64 %1
+}
+
+define i64 @sext_i64_i16(i16 %a) nounwind {
+ %1 = sext i16 %a to i64
+ ret i64 %1
+}
+
+define i64 @sext_i64_i32(i32 %a) nounwind {
+ %1 = sext i32 %a to i64
+ ret i64 %1
+}
+
+define i64 @zext_i64_i8(i8 %a) nounwind {
+ %1 = zext i8 %a to i64
+ ret i64 %1
+}
+
+define i64 @zext_i64_i16(i16 %a) nounwind {
+ %1 = zext i16 %a to i64
+ ret i64 %1
+}
+
+define i64 @zext_i64_i32(i32 %a) nounwind {
+ %1 = zext i32 %a to i64
+ ret i64 %1
+}
+
+define i64 @add_i64(i64 %a, i64 %b) nounwind {
+ %1 = add i64 %a, %b
+ ret i64 %1
+}
+
+define i64 @mul_i64(i64 %a, i64 %b) nounwind {
+ %1 = mul i64 %a, %b
+ ret i64 %1
+}
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
new file mode 100644
index 000000000000..23a036e37443
--- /dev/null
+++ b/test/CodeGen/CellSPU/i8ops.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+
+; ModuleID = 'i8ops.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i8 @add_i8(i8 %a, i8 %b) nounwind {
+ %1 = add i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
+ %1 = add i8 %a, 15
+ ret i8 %1
+}
+
+define i8 @sub_i8(i8 %a, i8 %b) nounwind {
+ %1 = sub i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
+ %1 = sub i8 %a, 15
+ ret i8 %1
+}
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
new file mode 100644
index 000000000000..56d1b8fb41b2
--- /dev/null
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -0,0 +1,350 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ilh %t1.s | count 15
+; RUN: grep ceqh %t1.s | count 29
+; RUN: grep ceqhi %t1.s | count 13
+; RUN: grep clgth %t1.s | count 15
+; RUN: grep cgth %t1.s | count 14
+; RUN: grep cgthi %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i16 integer comparisons:
+define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, 511
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, -512
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, -1
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp eq i16 %arg1, 32768
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, 511
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, -512
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, -1
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ne i16 %arg1, 32768
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, 500
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, 0
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, 65024
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ugt i16 %arg1, 32768
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp uge i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp uge i16 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
+;; icmp ugt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, 511
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, 65534
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, 65024
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ult i16 %arg1, 32769
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ule i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp ule i16 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
+;; icmp ult i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, 511
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, -1
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, -512
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sgt i16 %arg1, 32768
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sge i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sge i16 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
+;; icmp sgt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, %arg2
+ ret i1 %A
+}
+
+define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, 511
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, -512
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, -1
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp slt i16 %arg1, 32768
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sle i16 %arg1, %arg2
+ %B = select i1 %A, i16 %val1, i16 %val2
+ ret i16 %B
+}
+
+define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+ %A = icmp sle i16 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
+;; icmp slt i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
new file mode 100644
index 000000000000..4f74b0dd0429
--- /dev/null
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -0,0 +1,350 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ila %t1.s | count 6
+; RUN: grep ceq %t1.s | count 28
+; RUN: grep ceqi %t1.s | count 12
+; RUN: grep clgt %t1.s | count 16
+; RUN: grep clgti %t1.s | count 6
+; RUN: grep cgt %t1.s | count 16
+; RUN: grep cgti %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i32 integer comparisons:
+define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, -512
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, -1
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp eq i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, -512
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, -1
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ne i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, 4294966784
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, 4294967293
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ugt i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp uge i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp uge i32 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
+;; icmp ugt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, 4294966784
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, 4294967293
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ult i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ule i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp ule i32 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
+;; icmp ult i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, 4294966784
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, 4294967293
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sgt i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sge i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sge i32 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
+;; icmp sgt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, %arg2
+ ret i1 %A
+}
+
+define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, 511
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, -512
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, -1
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp slt i32 %arg1, 32768
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sle i32 %arg1, %arg2
+ %B = select i1 %A, i32 %val1, i32 %val2
+ ret i32 %B
+}
+
+define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+ %A = icmp sle i32 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
+;; icmp slt i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
new file mode 100644
index 000000000000..b26252cedb30
--- /dev/null
+++ b/test/CodeGen/CellSPU/icmp64.ll
@@ -0,0 +1,146 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 20
+; RUN: grep cgti %t1.s | count 12
+; RUN: grep cgt %t1.s | count 16
+; RUN: grep clgt %t1.s | count 12
+; RUN: grep gb %t1.s | count 12
+; RUN: grep fsm %t1.s | count 10
+; RUN: grep xori %t1.s | count 5
+; RUN: grep selb %t1.s | count 18
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; i64 integer comparisons:
+define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp eq i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp eq i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ne i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ne i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ugt i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ugt i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp uge i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp uge i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ult i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ult i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ule i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ule i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sgt i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sgt i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sge i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sge i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp slt i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp slt i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sle i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp sle i64 %arg1, %arg2
+ ret i1 %A
+}
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
new file mode 100644
index 000000000000..d246481f03a1
--- /dev/null
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -0,0 +1,286 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceqb %t1.s | count 24
+; RUN: grep ceqbi %t1.s | count 12
+; RUN: grep clgtb %t1.s | count 11
+; RUN: grep cgtb %t1.s | count 13
+; RUN: grep cgtbi %t1.s | count 5
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i8 integer comparisons:
+define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp eq i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp eq i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp eq i8 %arg1, 127
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp eq i8 %arg1, -128
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp eq i8 %arg1, -1
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ne i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ne i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ne i8 %arg1, 127
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ne i8 %arg1, -128
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ne i8 %arg1, -1
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ugt i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ugt i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ugt i8 %arg1, 126
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp uge i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp uge i8 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
+;; icmp ugt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ult i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ult i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ult i8 %arg1, 253
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ult i8 %arg1, 129
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ule i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp ule i8 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
+;; icmp ult i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sgt i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sgt i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sgt i8 %arg1, 96
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sgt i8 %arg1, -1
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sgt i8 %arg1, -128
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sge i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sge i8 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
+;; icmp sgt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp slt i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp slt i8 %arg1, %arg2
+ ret i1 %A
+}
+
+define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp slt i8 %arg1, 96
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp slt i8 %arg1, -120
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp slt i8 %arg1, -1
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sle i8 %arg1, %arg2
+ %B = select i1 %A, i8 %val1, i8 %val2
+ ret i8 %B
+}
+
+define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+ %A = icmp sle i8 %arg1, %arg2
+ ret i1 %A
+}
+
+;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
+;; icmp slt i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
new file mode 100644
index 000000000000..9a461cbb85a6
--- /dev/null
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep "ilh" %t1.s | count 11
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i16 @test_1() {
+ %x = alloca i16, align 16
+ store i16 419, i16* %x ;; ILH via pattern
+ ret i16 0
+}
+
+define i16 @test_2() {
+ %x = alloca i16, align 16
+ store i16 1023, i16* %x ;; ILH via pattern
+ ret i16 0
+}
+
+define i16 @test_3() {
+ %x = alloca i16, align 16
+ store i16 -1023, i16* %x ;; ILH via pattern
+ ret i16 0
+}
+
+define i16 @test_4() {
+ %x = alloca i16, align 16
+ store i16 32767, i16* %x ;; ILH via pattern
+ ret i16 0
+}
+
+define i16 @test_5() {
+ %x = alloca i16, align 16
+ store i16 -32768, i16* %x ;; ILH via pattern
+ ret i16 0
+}
+
+define i16 @test_6() {
+ ret i16 0
+}
+
+
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
new file mode 100644
index 000000000000..bf471b1eb1ce
--- /dev/null
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -0,0 +1,72 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ilhu %t1.s | count 8
+; RUN: grep iohl %t1.s | count 6
+; RUN: grep -w il %t1.s | count 3
+; RUN: grep 16429 %t1.s | count 1
+; RUN: grep 63572 %t1.s | count 1
+; RUN: grep 128 %t1.s | count 1
+; RUN: grep 32639 %t1.s | count 1
+; RUN: grep 65535 %t1.s | count 1
+; RUN: grep 16457 %t1.s | count 1
+; RUN: grep 4059 %t1.s | count 1
+; RUN: grep 49077 %t1.s | count 1
+; RUN: grep 1267 %t1.s | count 2
+; RUN: grep 16309 %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @test_1() {
+ ret i32 4784128 ;; ILHU via pattern (0x49000)
+}
+
+define i32 @test_2() {
+ ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f)
+}
+
+define i32 @test_3() {
+ ret i32 511 ;; IL via pattern
+}
+
+define i32 @test_4() {
+ ret i32 -512 ;; IL via pattern
+}
+
+;; double float floatval
+;; 0x4005bf0a80000000 0x402d|f854 2.718282
+define float @float_const_1() {
+ ret float 0x4005BF0A80000000 ;; ILHU/IOHL
+}
+
+;; double float floatval
+;; 0x3810000000000000 0x0080|0000 0.000000
+define float @float_const_2() {
+ ret float 0x3810000000000000 ;; IL 128
+}
+
+;; double float floatval
+;; 0x47efffffe0000000 0x7f7f|ffff NaN
+define float @float_const_3() {
+ ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern
+}
+
+;; double float floatval
+;; 0x400921fb60000000 0x4049|0fdb 3.141593
+define float @float_const_4() {
+ ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern
+}
+
+;; double float floatval
+;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
+define float @float_const_5() {
+ ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern
+}
+
+;; double float floatval
+;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
+define float @float_const_6() {
+ ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern
+}
+
+define float @float_const_7() {
+ ret float 0.000000e+00 ;; IL 0 via pattern
+}
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
new file mode 100644
index 000000000000..bbda3ff329cb
--- /dev/null
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -0,0 +1,95 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep lqa %t1.s | count 13
+; RUN: grep ilhu %t1.s | count 15
+; RUN: grep ila %t1.s | count 1
+; RUN: grep -w il %t1.s | count 6
+; RUN: grep shufb %t1.s | count 13
+; RUN: grep 65520 %t1.s | count 1
+; RUN: grep 43981 %t1.s | count 1
+; RUN: grep 13702 %t1.s | count 1
+; RUN: grep 28225 %t1.s | count 1
+; RUN: grep 30720 %t1.s | count 1
+; RUN: grep 3233857728 %t1.s | count 8
+; RUN: grep 2155905152 %t1.s | count 6
+; RUN: grep 66051 %t1.s | count 7
+; RUN: grep 471670303 %t1.s | count 11
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
+; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
+; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
+; 5308431 => 0x 00000000 0051000F
+; 9223372038704560128 => 0x 80000000 6e417800
+
+define i64 @i64_const_1() {
+ ret i64 1311768467750121234 ;; Constant pool spill
+}
+
+define i64 @i64_const_2() {
+ ret i64 18446744073709551591 ;; IL/SHUFB
+}
+
+define i64 @i64_const_3() {
+ ret i64 18446744073708516742 ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_4() {
+ ret i64 5308431 ;; ILHU/IOHL/SHUFB
+}
+
+define i64 @i64_const_5() {
+ ret i64 511 ;; IL/SHUFB
+}
+
+define i64 @i64_const_6() {
+ ret i64 -512 ;; IL/SHUFB
+}
+
+define i64 @i64_const_7() {
+ ret i64 9223372038704560128 ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_8() {
+ ret i64 0 ;; IL
+}
+
+define i64 @i64_const_9() {
+ ret i64 -1 ;; IL
+}
+
+define i64 @i64_const_10() {
+ ret i64 281470681808895 ;; IL 65535
+}
+
+; 0x4005bf0a8b145769 ->
+; (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
+; (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
+define double @f64_const_1() {
+ ret double 0x4005bf0a8b145769 ;; ILHU/IOHL via pattern
+}
+
+define double @f64_const_2() {
+ ret double 0x0010000000000000
+}
+
+define double @f64_const_3() {
+ ret double 0x7fefffffffffffff
+}
+
+define double @f64_const_4() {
+ ret double 0x400921fb54442d18
+}
+
+define double @f64_const_5() {
+ ret double 0xbff6a09e667f3bcd ;; ILHU/IOHL via pattern
+}
+
+define double @f64_const_6() {
+ ret double 0x3ff6a09e667f3bcd
+}
+
+define double @f64_const_7() {
+ ret double 0.000000e+00
+}
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
new file mode 100644
index 000000000000..ee3076594ad6
--- /dev/null
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep csflt %t1.s | count 5
+; RUN: grep cuflt %t1.s | count 1
+; RUN: grep xshw %t1.s | count 2
+; RUN: grep xsbh %t1.s | count 1
+; RUN: grep and %t1.s | count 2
+; RUN: grep andi %t1.s | count 1
+; RUN: grep ila %t1.s | count 1
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @sitofp_i32(i32 %arg1) {
+ %A = sitofp i32 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
+
+define float @uitofp_u32(i32 %arg1) {
+ %A = uitofp i32 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
+
+define float @sitofp_i16(i16 %arg1) {
+ %A = sitofp i16 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
+
+define float @uitofp_i16(i16 %arg1) {
+ %A = uitofp i16 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
+
+define float @sitofp_i8(i8 %arg1) {
+ %A = sitofp i8 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
+
+define float @uitofp_i8(i8 %arg1) {
+ %A = uitofp i8 %arg1 to float ; <float> [#uses=1]
+ ret float %A
+}
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
new file mode 100644
index 000000000000..87ad18211a25
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -0,0 +1,150 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 30
+; RUN: grep ceqb %t1.s | count 10
+; RUN: grep ceqhi %t1.s | count 5
+; RUN: grep ceqi %t1.s | count 5
+; RUN: grep cgt %t1.s | count 30
+; RUN: grep cgtb %t1.s | count 10
+; RUN: grep cgthi %t1.s | count 5
+; RUN: grep cgti %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
new file mode 100644
index 000000000000..c18f8deb385e
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fa %t1.s | count 5
+; RUN: grep fs %t1.s | count 5
+; RUN: grep fm %t1.s | count 15
+; RUN: grep fceq %t1.s | count 5
+; RUN: grep fcmeq %t1.s | count 5
+; RUN: grep fcgt %t1.s | count 5
+; RUN: grep fcmgt %t1.s | count 5
+; RUN: grep fma %t1.s | count 5
+; RUN: grep fnms %t1.s | count 5
+; RUN: grep fms %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
new file mode 100644
index 000000000000..843340b74542
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 20
+; RUN: grep andc %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
new file mode 100644
index 000000000000..3b9746c8080a
--- /dev/null
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {lqd.*0(\$3)} %t1.s | count 1
+; RUN: grep {lqd.*16(\$3)} %t1.s | count 1
+
+; ModuleID = 'loads.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
+entry:
+ %tmp1 = load <4 x float>* %a
+ ret <4 x float> %tmp1
+}
+
+define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1]
+ %tmp1 = load <4 x float>* %arrayidx ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp1
+}
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
new file mode 100644
index 000000000000..085ce555dc25
--- /dev/null
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -0,0 +1,89 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep mpy %t1.s | count 44
+; RUN: grep mpyu %t1.s | count 4
+; RUN: grep mpyh %t1.s | count 10
+; RUN: grep mpyhh %t1.s | count 2
+; RUN: grep rotma %t1.s | count 12
+; RUN: grep rotmahi %t1.s | count 4
+; RUN: grep and %t1.s | count 2
+; RUN: grep selb %t1.s | count 6
+; RUN: grep fsmbi %t1.s | count 4
+; RUN: grep shli %t1.s | count 4
+; RUN: grep shlhi %t1.s | count 4
+; RUN: grep ila %t1.s | count 2
+; RUN: grep xsbh %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; 32-bit multiply instruction generation:
+define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+ %A = mul <4 x i32> %arg1, %arg2
+ ret <4 x i32> %A
+}
+
+define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+ %A = mul <4 x i32> %arg2, %arg1
+ ret <4 x i32> %A
+}
+
+define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+ %A = mul <8 x i16> %arg1, %arg2
+ ret <8 x i16> %A
+}
+
+define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+ %A = mul <8 x i16> %arg2, %arg1
+ ret <8 x i16> %A
+}
+
+define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+ %A = mul <16 x i8> %arg2, %arg1
+ ret <16 x i8> %A
+}
+
+define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+ %A = mul <16 x i8> %arg1, %arg2
+ ret <16 x i8> %A
+}
+
+define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
+entry:
+ %A = mul i32 %arg2, %arg1
+ ret i32 %A
+}
+
+define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
+entry:
+ %A = mul i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
+entry:
+ %A = mul i16 %arg2, %arg1
+ ret i16 %A
+}
+
+define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
+entry:
+ %A = mul i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
+entry:
+ %A = mul i8 %arg2, %arg1
+ ret i8 %A
+}
+
+define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
+entry:
+ %A = mul i8 %arg1, %arg2
+ ret i8 %A
+}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
new file mode 100644
index 000000000000..841a3ec54d6f
--- /dev/null
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -0,0 +1,121 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep nand %t1.s | count 90
+; RUN: grep and %t1.s | count 94
+; RUN: grep xsbh %t1.s | count 2
+; RUN: grep xshw %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
+ %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %B
+}
+
+define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
+ %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %B
+}
+
+define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1]
+ %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %B
+}
+
+define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1]
+ %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %B
+}
+
+define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1]
+ %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <16 x i8> %B
+}
+
+define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1]
+ %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <16 x i8> %B
+}
+
+define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
+ %A = and i32 %arg2, %arg1 ; <i32> [#uses=1]
+ %B = xor i32 %A, -1 ; <i32> [#uses=1]
+ ret i32 %B
+}
+
+define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
+ %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %B = xor i32 %A, -1 ; <i32> [#uses=1]
+ ret i32 %B
+}
+
+define i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
+ %A = and i16 %arg2, %arg1 ; <i16> [#uses=1]
+ %B = xor i16 %A, -1 ; <i16> [#uses=1]
+ ret i16 %B
+}
+
+define i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
+ %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %B = xor i16 %A, -1 ; <i16> [#uses=1]
+ ret i16 %B
+}
+
+define i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+ %A = and i16 %arg2, %arg1 ; <i16> [#uses=1]
+ %B = xor i16 %A, -1 ; <i16> [#uses=1]
+ ret i16 %B
+}
+
+define i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+ %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
+ %B = xor i16 %A, -1 ; <i16> [#uses=1]
+ ret i16 %B
+}
+
+define i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+ %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
+
+define i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
+
+define i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
+ %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
+
+define i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
+
+define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
+ %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
+
+define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
+ %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
+ %B = xor i8 %A, -1 ; <i8> [#uses=1]
+ ret i8 %B
+}
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
new file mode 100644
index 000000000000..4e9da8f12972
--- /dev/null
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -0,0 +1,264 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 2
+; RUN: grep orc %t1.s | count 85
+; RUN: grep ori %t1.s | count 30
+; RUN: grep orhi %t1.s | count 30
+; RUN: grep orbi %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; OR instruction generation:
+define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = or <4 x i32> %arg1, %arg2
+ ret <4 x i32> %A
+}
+
+define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = or <4 x i32> %arg2, %arg1
+ ret <4 x i32> %A
+}
+
+define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = or <8 x i16> %arg1, %arg2
+ ret <8 x i16> %A
+}
+
+define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = or <8 x i16> %arg2, %arg1
+ ret <8 x i16> %A
+}
+
+define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = or <16 x i8> %arg2, %arg1
+ ret <16 x i8> %A
+}
+
+define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = or <16 x i8> %arg1, %arg2
+ ret <16 x i8> %A
+}
+
+define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
+ %A = or i32 %arg2, %arg1
+ ret i32 %A
+}
+
+define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
+ %A = or i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
+ %A = or i16 %arg2, %arg1
+ ret i16 %A
+}
+
+define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
+ %A = or i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
+ %A = or i8 %arg2, %arg1
+ ret i8 %A
+}
+
+define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
+ %A = or i8 %arg1, %arg2
+ ret i8 %A
+}
+
+; ORC instruction generation:
+define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = or <4 x i32> %arg1, %A
+ ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = or <4 x i32> %arg2, %A
+ ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = or <4 x i32> %A, %arg2
+ ret <4 x i32> %B
+}
+
+define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = or <8 x i16> %arg1, %A
+ ret <8 x i16> %B
+}
+
+define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = or <8 x i16> %arg2, %A
+ ret <8 x i16> %B
+}
+
+define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = or <16 x i8> %arg2, %A
+ ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = or <16 x i8> %arg1, %A
+ ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = or <16 x i8> %A, %arg1
+ ret <16 x i8> %B
+}
+
+define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg2, -1
+ %B = or i32 %A, %arg1
+ ret i32 %B
+}
+
+define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg1, -1
+ %B = or i32 %A, %arg2
+ ret i32 %B
+}
+
+define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
+ %A = xor i32 %arg2, -1
+ %B = or i32 %arg1, %A
+ ret i32 %B
+}
+
+define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg2, -1
+ %B = or i16 %A, %arg1
+ ret i16 %B
+}
+
+define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg1, -1
+ %B = or i16 %A, %arg2
+ ret i16 %B
+}
+
+define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
+ %A = xor i16 %arg2, -1
+ %B = or i16 %arg1, %A
+ ret i16 %B
+}
+
+define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg2, -1
+ %B = or i8 %A, %arg1
+ ret i8 %B
+}
+
+define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg1, -1
+ %B = or i8 %A, %arg2
+ ret i8 %B
+}
+
+define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
+ %A = xor i8 %arg2, -1
+ %B = or i8 %arg1, %A
+ ret i8 %B
+}
+
+; ORI instruction generation (i32 data type):
+define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
+ %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
+ %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
+ %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
+ %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+ ret <4 x i32> %tmp2
+}
+
+define i32 @ori_u32(i32 zeroext %in) zeroext {
+ %tmp37 = or i32 %in, 37 ; <i32> [#uses=1]
+ ret i32 %tmp37
+}
+
+define i32 @ori_i32(i32 signext %in) signext {
+ %tmp38 = or i32 %in, 37 ; <i32> [#uses=1]
+ ret i32 %tmp38
+}
+
+; ORHI instruction generation (i16 data type):
+define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
+ %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+ i16 511, i16 511, i16 511, i16 511 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
+ %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+ i16 510, i16 510, i16 510, i16 510 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
+ %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
+ %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+ i16 -512, i16 -512, i16 -512, i16 -512 >
+ ret <8 x i16> %tmp2
+}
+
+define i16 @orhi_u16(i16 zeroext %in) zeroext {
+ %tmp37 = or i16 %in, 37 ; <i16> [#uses=1]
+ ret i16 %tmp37
+}
+
+define i16 @orhi_i16(i16 signext %in) signext {
+ %tmp38 = or i16 %in, 37 ; <i16> [#uses=1]
+ ret i16 %tmp38
+}
+
+; ORBI instruction generation (i8 data type):
+define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
+ %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+ i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+ i8 42, i8 42, i8 42, i8 42 >
+ ret <16 x i8> %tmp2
+}
+
+define i8 @orbi_u8(i8 zeroext %in) zeroext {
+ %tmp37 = or i8 %in, 37 ; <i8> [#uses=1]
+ ret i8 %tmp37
+}
+
+define i8 @orbi_i8(i8 signext %in) signext {
+ %tmp38 = or i8 %in, 37 ; <i8> [#uses=1]
+ ret i8 %tmp38
+}
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
new file mode 100644
index 000000000000..91567ce82803
--- /dev/null
+++ b/test/CodeGen/CellSPU/private.ll
@@ -0,0 +1,22 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llvm-as < %s | llc -march=cellspu > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep brsl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep ila.*\.Lbaz %t
+
+
+declare void @foo()
+
+define private void @foo() {
+ ret void
+}
+
+@baz = private global i32 4;
+
+define i32 @bar() {
+ call void @foo()
+ %1 = load i32* @baz, align 4
+ ret i32 %1
+}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
new file mode 100644
index 000000000000..e308172486a5
--- /dev/null
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -0,0 +1,160 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s
+; RUN: grep rot %t1.s | count 85
+; RUN: grep roth %t1.s | count 8
+; RUN: grep roti.*5 %t1.s | count 1
+; RUN: grep roti.*27 %t1.s | count 1
+; RUN grep rothi.*5 %t1.s | count 2
+; RUN grep rothi.*11 %t1.s | count 1
+; RUN grep rothi.*,.3 %t1.s | count 1
+; RUN: grep andhi %t1.s | count 4
+; RUN: grep shlhi %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Vector rotates are not currently supported in gcc or llvm assembly. These are
+; not tested.
+
+; 32-bit rotates:
+define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
+ %tmp1 = zext i8 %arg2 to i32 ; <i32> [#uses=1]
+ %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1]
+ %arg22 = sub i8 32, %arg2 ; <i8> [#uses=1]
+ %tmp2 = zext i8 %arg22 to i32 ; <i32> [#uses=1]
+ %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
+ %tmp1 = zext i16 %arg2 to i32 ; <i32> [#uses=1]
+ %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1]
+ %arg22 = sub i16 32, %arg2 ; <i8> [#uses=1]
+ %tmp2 = zext i16 %arg22 to i32 ; <i32> [#uses=1]
+ %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
+ %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
+ %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
+ %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
+ %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
+ %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
+ %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
+ %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotr32_1(i32 %A, i8 %Amt) {
+ %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
+ %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1]
+ %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
+ %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
+ %C = shl i32 %A, %tmp2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotr32_2(i32 %A, i8 %Amt) {
+ %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
+ %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
+ %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1]
+ %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
+ %C = shl i32 %A, %tmp2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+; Rotate left with immediate
+define i32 @rotli32(i32 %A) {
+ %B = shl i32 %A, 5 ; <i32> [#uses=1]
+ %C = lshr i32 %A, 27 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+; Rotate right with immediate
+define i32 @rotri32(i32 %A) {
+ %B = lshr i32 %A, 5 ; <i32> [#uses=1]
+ %C = shl i32 %A, 27 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+; 16-bit rotates:
+define i16 @rotr16_1(i16 %arg1, i8 %arg) {
+ %tmp1 = zext i8 %arg to i16 ; <i16> [#uses=1]
+ %B = lshr i16 %arg1, %tmp1 ; <i16> [#uses=1]
+ %arg2 = sub i8 16, %arg ; <i8> [#uses=1]
+ %tmp2 = zext i8 %arg2 to i16 ; <i16> [#uses=1]
+ %C = shl i16 %arg1, %tmp2 ; <i16> [#uses=1]
+ %D = or i16 %B, %C ; <i16> [#uses=1]
+ ret i16 %D
+}
+
+define i16 @rotr16_2(i16 %arg1, i16 %arg) {
+ %B = lshr i16 %arg1, %arg ; <i16> [#uses=1]
+ %tmp1 = sub i16 16, %arg ; <i16> [#uses=1]
+ %C = shl i16 %arg1, %tmp1 ; <i16> [#uses=1]
+ %D = or i16 %B, %C ; <i16> [#uses=1]
+ ret i16 %D
+}
+
+define i16 @rotli16(i16 %A) {
+ %B = shl i16 %A, 5 ; <i16> [#uses=1]
+ %C = lshr i16 %A, 11 ; <i16> [#uses=1]
+ %D = or i16 %B, %C ; <i16> [#uses=1]
+ ret i16 %D
+}
+
+define i16 @rotri16(i16 %A) {
+ %B = lshr i16 %A, 5 ; <i16> [#uses=1]
+ %C = shl i16 %A, 11 ; <i16> [#uses=1]
+ %D = or i16 %B, %C ; <i16> [#uses=1]
+ ret i16 %D
+}
+
+define i8 @rotl8(i8 %A, i8 %Amt) {
+ %B = shl i8 %A, %Amt ; <i8> [#uses=1]
+ %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1]
+ %C = lshr i8 %A, %Amt2 ; <i8> [#uses=1]
+ %D = or i8 %B, %C ; <i8> [#uses=1]
+ ret i8 %D
+}
+
+define i8 @rotr8(i8 %A, i8 %Amt) {
+ %B = lshr i8 %A, %Amt ; <i8> [#uses=1]
+ %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1]
+ %C = shl i8 %A, %Amt2 ; <i8> [#uses=1]
+ %D = or i8 %B, %C ; <i8> [#uses=1]
+ ret i8 %D
+}
+
+define i8 @rotli8(i8 %A) {
+ %B = shl i8 %A, 5 ; <i8> [#uses=1]
+ %C = lshr i8 %A, 3 ; <i8> [#uses=1]
+ %D = or i8 %B, %C ; <i8> [#uses=1]
+ ret i8 %D
+}
+
+define i8 @rotri8(i8 %A) {
+ %B = lshr i8 %A, 5 ; <i8> [#uses=1]
+ %C = shl i8 %A, 3 ; <i8> [#uses=1]
+ %D = or i8 %B, %C ; <i8> [#uses=1]
+ ret i8 %D
+}
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
new file mode 100644
index 000000000000..e83e47606c28
--- /dev/null
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -0,0 +1,569 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep selb %t1.s | count 56
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v2i64
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %C = and <2 x i64> %rC, %rB
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %A, %rA
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %C = and <2 x i64> %rB, %rC
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %A, %rA
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %A, %rA
+ %C = and <2 x i64> %rB, %rC
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %A, %rA
+ %C = and <2 x i64> %rC, %rB
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %C = and <2 x i64> %rC, %rB
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %rA, %A
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %C = and <2 x i64> %rB, %rC
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %rA, %A
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %rA, %A
+ %C = and <2 x i64> %rB, %rC
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+ %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+ %B = and <2 x i64> %rA, %A
+ %C = and <2 x i64> %rC, %rB
+ %D = or <2 x i64> %C, %B
+ ret <2 x i64> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v4i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %C = and <4 x i32> %rC, %rB
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %A, %rA
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %C = and <4 x i32> %rB, %rC
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %A, %rA
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %B = and <4 x i32> %A, %rA
+ %C = and <4 x i32> %rB, %rC
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+ %B = and <4 x i32> %A, %rA
+ %C = and <4 x i32> %rC, %rB
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %C = and <4 x i32> %rC, %rB
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+ %B = and <4 x i32> %rA, %A
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %C = and <4 x i32> %rB, %rC
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+ %B = and <4 x i32> %rA, %A
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+ %B = and <4 x i32> %rA, %A
+ %C = and <4 x i32> %rB, %rC
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+ %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+ %B = and <4 x i32> %rA, %A
+ %C = and <4 x i32> %rC, %rB
+ %D = or <4 x i32> %C, %B
+ ret <4 x i32> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v8i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %C = and <8 x i16> %rC, %rB
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %A, %rA
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %C = and <8 x i16> %rB, %rC
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %A, %rA
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %A, %rA
+ %C = and <8 x i16> %rB, %rC
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %A, %rA
+ %C = and <8 x i16> %rC, %rB
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %C = and <8 x i16> %rC, %rB
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %rA, %A
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %C = and <8 x i16> %rB, %rC
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %rA, %A
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %rA, %A
+ %C = and <8 x i16> %rB, %rC
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+ %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1 >
+ %B = and <8 x i16> %rA, %A
+ %C = and <8 x i16> %rC, %rB
+ %D = or <8 x i16> %C, %B
+ ret <8 x i16> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v16i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %C = and <16 x i8> %rC, %rB
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %A, %rA
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %C = and <16 x i8> %rB, %rC
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %A, %rA
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %A, %rA
+ %C = and <16 x i8> %rB, %rC
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %A, %rA
+ %C = and <16 x i8> %rC, %rB
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %C = and <16 x i8> %rC, %rB
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %rA, %A
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %C = and <16 x i8> %rB, %rC
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %rA, %A
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %rA, %A
+ %C = and <16 x i8> %rB, %rC
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+ %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1 >
+ %B = and <16 x i8> %rA, %A
+ %C = and <16 x i8> %rC, %rB
+ %D = or <16 x i8> %C, %B
+ ret <16 x i8> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
+ %C = and i32 %rC, %rB
+ %A = xor i32 %rC, -1
+ %B = and i32 %A, %rA
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
+ %C = and i32 %rB, %rC
+ %A = xor i32 %rC, -1
+ %B = and i32 %A, %rA
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
+ %A = xor i32 %rC, -1
+ %B = and i32 %A, %rA
+ %C = and i32 %rB, %rC
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
+ %A = xor i32 %rC, -1
+ %B = and i32 %A, %rA
+ %C = and i32 %rC, %rB
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
+ %C = and i32 %rC, %rB
+ %A = xor i32 %rC, -1
+ %B = and i32 %rA, %A
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
+ %C = and i32 %rB, %rC
+ %A = xor i32 %rC, -1
+ %B = and i32 %rA, %A
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
+ %A = xor i32 %rC, -1
+ %B = and i32 %rA, %A
+ %C = and i32 %rB, %rC
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
+ %A = xor i32 %rC, -1
+ %B = and i32 %rA, %A
+ %C = and i32 %rC, %rB
+ %D = or i32 %C, %B
+ ret i32 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
+ %C = and i16 %rC, %rB
+ %A = xor i16 %rC, -1
+ %B = and i16 %A, %rA
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
+ %C = and i16 %rB, %rC
+ %A = xor i16 %rC, -1
+ %B = and i16 %A, %rA
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
+ %A = xor i16 %rC, -1
+ %B = and i16 %A, %rA
+ %C = and i16 %rB, %rC
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
+ %A = xor i16 %rC, -1
+ %B = and i16 %A, %rA
+ %C = and i16 %rC, %rB
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
+ %C = and i16 %rC, %rB
+ %A = xor i16 %rC, -1
+ %B = and i16 %rA, %A
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
+ %C = and i16 %rB, %rC
+ %A = xor i16 %rC, -1
+ %B = and i16 %rA, %A
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
+ %A = xor i16 %rC, -1
+ %B = and i16 %rA, %A
+ %C = and i16 %rB, %rC
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
+ %A = xor i16 %rC, -1
+ %B = and i16 %rA, %A
+ %C = and i16 %rC, %rB
+ %D = or i16 %C, %B
+ ret i16 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
+ %C = and i8 %rC, %rB
+ %A = xor i8 %rC, -1
+ %B = and i8 %A, %rA
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
+ %C = and i8 %rB, %rC
+ %A = xor i8 %rC, -1
+ %B = and i8 %A, %rA
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
+ %A = xor i8 %rC, -1
+ %B = and i8 %A, %rA
+ %C = and i8 %rB, %rC
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
+ %A = xor i8 %rC, -1
+ %B = and i8 %A, %rA
+ %C = and i8 %rC, %rB
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
+ %C = and i8 %rC, %rB
+ %A = xor i8 %rC, -1
+ %B = and i8 %rA, %A
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
+ %C = and i8 %rB, %rC
+ %A = xor i8 %rC, -1
+ %B = and i8 %rA, %A
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
+ %A = xor i8 %rC, -1
+ %B = and i8 %rA, %A
+ %C = and i8 %rB, %rC
+ %D = or i8 %C, %B
+ ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
+ %A = xor i8 %rC, -1
+ %B = and i8 %rA, %A
+ %C = and i8 %rC, %rB
+ %D = or i8 %C, %B
+ ret i8 %D
+}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
new file mode 100644
index 000000000000..3c26baa7c7ab
--- /dev/null
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -0,0 +1,283 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {shlh } %t1.s | count 9
+; RUN: grep {shlhi } %t1.s | count 3
+; RUN: grep {shl } %t1.s | count 9
+; RUN: grep {shli } %t1.s | count 3
+; RUN: grep {xshw } %t1.s | count 5
+; RUN: grep {and } %t1.s | count 5
+; RUN: grep {andi } %t1.s | count 2
+; RUN: grep {rotmi } %t1.s | count 2
+; RUN: grep {rotqmbyi } %t1.s | count 1
+; RUN: grep {rotqmbii } %t1.s | count 2
+; RUN: grep {rotqmby } %t1.s | count 1
+; RUN: grep {rotqmbi } %t1.s | count 1
+; RUN: grep {rotqbyi } %t1.s | count 1
+; RUN: grep {rotqbii } %t1.s | count 2
+; RUN: grep {rotqbybi } %t1.s | count 1
+; RUN: grep {sfi } %t1.s | count 3
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Vector shifts are not currently supported in gcc or llvm assembly. These are
+; not tested.
+
+; Shift left i16 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
+ %A = shl i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
+ %A = shl i16 %arg2, %arg1
+ ret i16 %A
+}
+
+define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+ %A = shl i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext {
+ %A = shl i16 %arg2, %arg1
+ ret i16 %A
+}
+
+define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+ %A = shl i16 %arg1, %arg2
+ ret i16 %A
+}
+
+define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+ %A = shl i16 %arg2, %arg1
+ ret i16 %A
+}
+
+; Shift left i16 with immediate:
+define i16 @shlhi_i16_1(i16 %arg1) {
+ %A = shl i16 %arg1, 12
+ ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_2(i16 %arg1) {
+ %A = shl i16 %arg1, 0
+ ret i16 %A
+}
+
+define i16 @shlhi_i16_3(i16 %arg1) {
+ %A = shl i16 16383, %arg1
+ ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_4(i16 %arg1) {
+ %A = shl i16 0, %arg1
+ ret i16 %A
+}
+
+define i16 @shlhi_i16_5(i16 signext %arg1) signext {
+ %A = shl i16 %arg1, 12
+ ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_6(i16 signext %arg1) signext {
+ %A = shl i16 %arg1, 0
+ ret i16 %A
+}
+
+define i16 @shlhi_i16_7(i16 signext %arg1) signext {
+ %A = shl i16 16383, %arg1
+ ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_8(i16 signext %arg1) signext {
+ %A = shl i16 0, %arg1
+ ret i16 %A
+}
+
+define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext {
+ %A = shl i16 %arg1, 12
+ ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext {
+ %A = shl i16 %arg1, 0
+ ret i16 %A
+}
+
+define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext {
+ %A = shl i16 16383, %arg1
+ ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext {
+ %A = shl i16 0, %arg1
+ ret i16 %A
+}
+
+; Shift left i32 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
+ %A = shl i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
+ %A = shl i32 %arg2, %arg1
+ ret i32 %A
+}
+
+define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext {
+ %A = shl i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext {
+ %A = shl i32 %arg2, %arg1
+ ret i32 %A
+}
+
+define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+ %A = shl i32 %arg1, %arg2
+ ret i32 %A
+}
+
+define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+ %A = shl i32 %arg2, %arg1
+ ret i32 %A
+}
+
+; Shift left i32 with immediate:
+define i32 @shli_i32_1(i32 %arg1) {
+ %A = shl i32 %arg1, 12
+ ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_2(i32 %arg1) {
+ %A = shl i32 %arg1, 0
+ ret i32 %A
+}
+
+define i32 @shli_i32_3(i32 %arg1) {
+ %A = shl i32 16383, %arg1
+ ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_4(i32 %arg1) {
+ %A = shl i32 0, %arg1
+ ret i32 %A
+}
+
+define i32 @shli_i32_5(i32 signext %arg1) signext {
+ %A = shl i32 %arg1, 12
+ ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_6(i32 signext %arg1) signext {
+ %A = shl i32 %arg1, 0
+ ret i32 %A
+}
+
+define i32 @shli_i32_7(i32 signext %arg1) signext {
+ %A = shl i32 16383, %arg1
+ ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_8(i32 signext %arg1) signext {
+ %A = shl i32 0, %arg1
+ ret i32 %A
+}
+
+define i32 @shli_i32_9(i32 zeroext %arg1) zeroext {
+ %A = shl i32 %arg1, 12
+ ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_10(i32 zeroext %arg1) zeroext {
+ %A = shl i32 %arg1, 0
+ ret i32 %A
+}
+
+define i32 @shli_i32_11(i32 zeroext %arg1) zeroext {
+ %A = shl i32 16383, %arg1
+ ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
+ %A = shl i32 0, %arg1
+ ret i32 %A
+}
+
+;; i64 shift left
+
+define i64 @shl_i64_1(i64 %arg1) {
+ %A = shl i64 %arg1, 9
+ ret i64 %A
+}
+
+define i64 @shl_i64_2(i64 %arg1) {
+ %A = shl i64 %arg1, 3
+ ret i64 %A
+}
+
+define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = shl i64 %arg1, %1
+ ret i64 %2
+}
+
+;; i64 shift right logical (shift 0s from the right)
+
+define i64 @lshr_i64_1(i64 %arg1) {
+ %1 = lshr i64 %arg1, 9
+ ret i64 %1
+}
+
+define i64 @lshr_i64_2(i64 %arg1) {
+ %1 = lshr i64 %arg1, 3
+ ret i64 %1
+}
+
+define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = lshr i64 %arg1, %1
+ ret i64 %2
+}
+
+;; i64 shift right arithmetic (shift 1s from the right)
+
+define i64 @ashr_i64_1(i64 %arg) {
+ %1 = ashr i64 %arg, 9
+ ret i64 %1
+}
+
+define i64 @ashr_i64_2(i64 %arg) {
+ %1 = ashr i64 %arg, 3
+ ret i64 %1
+}
+
+define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = ashr i64 %arg1, %1
+ ret i64 %2
+}
+
+define i32 @hi32_i64(i64 %arg) {
+ %1 = lshr i64 %arg, 32
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
new file mode 100644
index 000000000000..df3baef85c9d
--- /dev/null
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -0,0 +1,90 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: grep fa %t1.s | count 2
+; RUN: grep fs %t1.s | count 2
+; RUN: grep fm %t1.s | count 6
+; RUN: grep fma %t1.s | count 2
+; RUN: grep fms %t1.s | count 2
+; RUN: grep fnms %t1.s | count 3
+;
+; This file includes standard floating point arithmetic instructions
+; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fp_add(float %arg1, float %arg2) {
+ %A = add float %arg1, %arg2 ; <float> [#uses=1]
+ ret float %A
+}
+
+define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
+ %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %A
+}
+
+define float @fp_sub(float %arg1, float %arg2) {
+ %A = sub float %arg1, %arg2 ; <float> [#uses=1]
+ ret float %A
+}
+
+define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
+ %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %A
+}
+
+define float @fp_mul(float %arg1, float %arg2) {
+ %A = mul float %arg1, %arg2 ; <float> [#uses=1]
+ ret float %A
+}
+
+define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
+ %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %A
+}
+
+define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
+ %A = mul float %arg1, %arg2 ; <float> [#uses=1]
+ %B = add float %A, %arg3 ; <float> [#uses=1]
+ ret float %B
+}
+
+define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+ %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
+ %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %B
+}
+
+define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
+ %A = mul float %arg1, %arg2 ; <float> [#uses=1]
+ %B = sub float %A, %arg3 ; <float> [#uses=1]
+ ret float %B
+}
+
+define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+ %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
+ %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %B
+}
+
+; Test the straightforward way of getting fnms
+; c - a * b
+define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
+ %A = mul float %arg1, %arg2
+ %B = sub float %arg3, %A
+ ret float %B
+}
+
+; Test another way of getting fnms
+; - ( a *b -c ) = c - a * b
+define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
+ %A = mul float %arg1, %arg2
+ %B = sub float %A, %arg3
+ %C = sub float -0.0, %B
+ ret float %C
+}
+
+define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+ %A = mul <4 x float> %arg1, %arg2
+ %B = sub <4 x float> %A, %arg3
+ %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
+ ret <4 x float> %D
+}
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
new file mode 100644
index 000000000000..f2f35ef4dbc4
--- /dev/null
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -0,0 +1,151 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {stqd.*0(\$3)} %t1.s | count 4
+; RUN: grep {stqd.*16(\$3)} %t1.s | count 4
+; RUN: grep 16256 %t1.s | count 2
+; RUN: grep 16384 %t1.s | count 1
+; RUN: grep 771 %t1.s | count 4
+; RUN: grep 515 %t1.s | count 2
+; RUN: grep 1799 %t1.s | count 2
+; RUN: grep 1543 %t1.s | count 5
+; RUN: grep 1029 %t1.s | count 3
+; RUN: grep {shli.*, 4} %t1.s | count 4
+; RUN: grep stqx %t1.s | count 4
+; RUN: grep ilhu %t1.s | count 11
+; RUN: grep iohl %t1.s | count 8
+; RUN: grep shufb %t1.s | count 15
+; RUN: grep frds %t1.s | count 1
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define void @store_v16i8_1(<16 x i8>* %a) nounwind {
+entry:
+ store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
+ ret void
+}
+
+define void @store_v16i8_2(<16 x i8>* %a) nounwind {
+entry:
+ %arrayidx = getelementptr <16 x i8>* %a, i32 1
+ store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
+ ret void
+}
+
+define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <16 x i8>* %a, i32 %i
+ store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
+ ret void
+}
+
+define void @store_v8i16_1(<8 x i16>* %a) nounwind {
+entry:
+ store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
+ ret void
+}
+
+define void @store_v8i16_2(<8 x i16>* %a) nounwind {
+entry:
+ %arrayidx = getelementptr <8 x i16>* %a, i16 1
+ store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
+ ret void
+}
+
+define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <8 x i16>* %a, i32 %i
+ store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
+ ret void
+}
+
+define void @store_v4i32_1(<4 x i32>* %a) nounwind {
+entry:
+ store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
+ ret void
+}
+
+define void @store_v4i32_2(<4 x i32>* %a) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x i32>* %a, i32 1
+ store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
+ ret void
+}
+
+define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x i32>* %a, i32 %i
+ store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
+ ret void
+}
+
+define void @store_v4f32_1(<4 x float>* %a) nounwind {
+entry:
+ store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
+ ret void
+}
+
+define void @store_v4f32_2(<4 x float>* %a) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 1
+ store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
+ ret void
+}
+
+define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 %i
+ store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
+ ret void
+}
+
+; Test truncating stores:
+
+define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i16 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i32 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
+entry:
+ %conv = trunc i32 %val to i16
+ store i16 %conv, i16* %dest
+ ret i16 %conv
+}
+
+define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i16
+ store i16 %conv, i16* %dest
+ ret i16 %conv
+}
+
+define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i32
+ store i32 %conv, i32* %dest
+ ret i32 %conv
+}
+
+define float @tstore_f64_f32(double %val, float* %dest) nounwind {
+entry:
+ %conv = fptrunc double %val to float
+ store float %conv, float* %dest
+ ret float %conv
+}
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
new file mode 100644
index 000000000000..82d319dd1050
--- /dev/null
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -0,0 +1,144 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep lqa %t1.s | count 5
+; RUN: grep lqd %t1.s | count 11
+; RUN: grep rotqbyi %t1.s | count 7
+; RUN: grep xshw %t1.s | count 1
+; RUN: grep andi %t1.s | count 5
+; RUN: grep cbd %t1.s | count 3
+; RUN: grep chd %t1.s | count 1
+; RUN: grep cwd %t1.s | count 3
+; RUN: grep shufb %t1.s | count 7
+; RUN: grep stqd %t1.s | count 7
+; RUN: grep iohl %t2.s | count 16
+; RUN: grep ilhu %t2.s | count 16
+; RUN: grep lqd %t2.s | count 16
+; RUN: grep rotqbyi %t2.s | count 7
+; RUN: grep xshw %t2.s | count 1
+; RUN: grep andi %t2.s | count 5
+; RUN: grep cbd %t2.s | count 3
+; RUN: grep chd %t2.s | count 1
+; RUN: grep cwd %t2.s | count 3
+; RUN: grep shufb %t2.s | count 7
+; RUN: grep stqd %t2.s | count 7
+
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
+; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
+; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
+; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
+; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
+; int i2; // offset 12 [ignored]
+; unsigned char c4; // offset 16 [ignored]
+; unsigned char c5; // offset 17 [ignored]
+; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3)
+; unsigned char c7; // offset 19 (no rotate, in preferred slot)
+; int i3; // offset 20 [ignored]
+; int i4; // offset 24 [ignored]
+; int i5; // offset 28 [ignored]
+; int i6; // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext nounwind {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext nounwind {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext nounwind {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() nounwind {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext nounwind {
+entry:
+ %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c6() zeroext nounwind {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext nounwind {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i3() nounwind {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+ ret i32 %tmp2
+}
+
+define i32 @get_hackstate_i6() nounwind {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+ ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext %c) nounwind {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext %c) nounwind {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext %c) nounwind {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret void
+}
+
+define void @set_hackstate_i1(i32 %i) nounwind {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret void
+}
+
+define void @set_hackstate_s1(i16 signext %s) nounwind {
+entry:
+ store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret void
+}
+
+define void @set_hackstate_i3(i32 %i) nounwind {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+ ret void
+}
+
+define void @set_hackstate_i6(i32 %i) nounwind {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+ ret void
+}
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
new file mode 100644
index 000000000000..db22564f4341
--- /dev/null
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep shufb %t1.s | count 19
+; RUN: grep {ilhu.*1799} %t1.s | count 1
+; RUN: grep {ilhu.*771} %t1.s | count 2
+; RUN: grep {ilhu.*1543} %t1.s | count 1
+; RUN: grep {ilhu.*1029} %t1.s | count 1
+; RUN: grep {ilhu.*515} %t1.s | count 1
+; RUN: grep {ilhu.*3855} %t1.s | count 1
+; RUN: grep {ilhu.*3599} %t1.s | count 1
+; RUN: grep {ilhu.*3085} %t1.s | count 1
+; RUN: grep {iohl.*3855} %t1.s | count 1
+; RUN: grep {iohl.*3599} %t1.s | count 2
+; RUN: grep {iohl.*1543} %t1.s | count 2
+; RUN: grep {iohl.*771} %t1.s | count 2
+; RUN: grep {iohl.*515} %t1.s | count 1
+; RUN: grep {iohl.*1799} %t1.s | count 1
+; RUN: grep lqa %t1.s | count 1
+; RUN: grep cbd %t1.s | count 4
+; RUN: grep chd %t1.s | count 3
+; RUN: grep cwd %t1.s | count 1
+; RUN: grep cdd %t1.s | count 1
+
+; ModuleID = 'trunc.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
+entry:
+ %0 = trunc i128 %u to i8
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15
+ ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
+entry:
+ %0 = trunc i128 %u to i16
+ %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8
+ ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
+entry:
+ %0 = trunc i128 %u to i32
+ %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
+ ret <4 x i32> %tmp1
+}
+
+define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
+entry:
+ %0 = trunc i128 %u to i64
+ %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
+ ret <2 x i64> %tmp1
+}
+
+define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
+entry:
+ %0 = trunc i64 %u to i8
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
+ ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
+entry:
+ %0 = trunc i64 %u to i16
+ %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
+ ret <8 x i16> %tmp1
+}
+
+define i32 @trunc_i64_i32(i64 %u) {
+entry:
+ %0 = trunc i64 %u to i32
+ ret i32 %0
+}
+
+define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
+entry:
+ %0 = trunc i32 %u to i8
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
+ ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
+entry:
+ %0 = trunc i32 %u to i16
+ %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
+ ret <8 x i16> %tmp1
+}
+
+define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
+entry:
+ %0 = trunc i16 %u to i8
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
+ ret <16 x i8> %tmp1
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/README.txt b/test/CodeGen/CellSPU/useful-harnesses/README.txt
new file mode 100644
index 000000000000..d87b3989e4f7
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/README.txt
@@ -0,0 +1,5 @@
+This directory contains code that's not part of the DejaGNU test suite,
+but is generally useful as various test harnesses.
+
+vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
+ 8-bit vector add and multiply.
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
new file mode 100644
index 000000000000..12fc30bf65d7
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
@@ -0,0 +1,69 @@
+#include <stdio.h>
+
+typedef unsigned int uint32_t;
+typedef int int32_t;
+
+const char *boolstring(int val) {
+ return val ? "true" : "false";
+}
+
+int i32_eq(int32_t a, int32_t b) {
+ return (a == b);
+}
+
+int i32_neq(int32_t a, int32_t b) {
+ return (a != b);
+}
+
+int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+ return ((a == b) ? c : d);
+}
+
+int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+ return ((a != b) ? c : d);
+}
+
+struct pred_s {
+ const char *name;
+ int (*predfunc)(int32_t, int32_t);
+ int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
+};
+
+struct pred_s preds[] = {
+ { "eq", i32_eq, i32_eq_select },
+ { "neq", i32_neq, i32_neq_select }
+};
+
+int main(void) {
+ int i;
+ int32_t a = 1234567890;
+ int32_t b = 345678901;
+ int32_t c = 1234500000;
+ int32_t d = 10001;
+ int32_t e = 10000;
+
+ printf("a = %12d (0x%08x)\n", a, a);
+ printf("b = %12d (0x%08x)\n", b, b);
+ printf("c = %12d (0x%08x)\n", c, c);
+ printf("d = %12d (0x%08x)\n", d, d);
+ printf("e = %12d (0x%08x)\n", e, e);
+ printf("----------------------------------------\n");
+
+ for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
+ printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+ printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+ printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
+ printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
+ printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
+ printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
+
+ printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
+ printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
+ printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
+ printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
+
+ printf("----------------------------------------\n");
+ }
+
+ return 0;
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
new file mode 100644
index 000000000000..b613bd872e28
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -0,0 +1,673 @@
+#include <stdio.h>
+#include "i64operations.h"
+
+int64_t tval_a = 1234567890003LL;
+int64_t tval_b = 2345678901235LL;
+int64_t tval_c = 1234567890001LL;
+int64_t tval_d = 10001LL;
+int64_t tval_e = 10000LL;
+uint64_t tval_f = 0xffffff0750135eb9;
+int64_t tval_g = -1;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+i64_eq(int64_t a, int64_t b)
+{
+ return (a == b);
+}
+
+int
+i64_neq(int64_t a, int64_t b)
+{
+ return (a != b);
+}
+
+int
+i64_gt(int64_t a, int64_t b)
+{
+ return (a > b);
+}
+
+int
+i64_le(int64_t a, int64_t b)
+{
+ return (a <= b);
+}
+
+int
+i64_ge(int64_t a, int64_t b) {
+ return (a >= b);
+}
+
+int
+i64_lt(int64_t a, int64_t b) {
+ return (a < b);
+}
+
+int
+i64_uge(uint64_t a, uint64_t b)
+{
+ return (a >= b);
+}
+
+int
+i64_ult(uint64_t a, uint64_t b)
+{
+ return (a < b);
+}
+
+int
+i64_ugt(uint64_t a, uint64_t b)
+{
+ return (a > b);
+}
+
+int
+i64_ule(uint64_t a, uint64_t b)
+{
+ return (a <= b);
+}
+
+int64_t
+i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+ return ((a == b) ? c : d);
+}
+
+int64_t
+i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+ return ((a != b) ? c : d);
+}
+
+int64_t
+i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a > b) ? c : d);
+}
+
+int64_t
+i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a <= b) ? c : d);
+}
+
+int64_t
+i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a >= b) ? c : d);
+}
+
+int64_t
+i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a < b) ? c : d);
+}
+
+uint64_t
+i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+ return ((a > b) ? c : d);
+}
+
+uint64_t
+i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+ return ((a <= b) ? c : d);
+}
+
+uint64_t
+i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+ return ((a >= b) ? c : d);
+}
+
+uint64_t
+i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+ return ((a < b) ? c : d);
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_int64_pred int64_tests_eq[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_neq[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sgt[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sle[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_sge[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_slt[] = {
+ {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+ {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+ {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct int64_pred_s int64_preds[] = {
+ {"eq", i64_eq, i64_eq_select,
+ int64_tests_eq, ARR_SIZE(int64_tests_eq)},
+ {"neq", i64_neq, i64_neq_select,
+ int64_tests_neq, ARR_SIZE(int64_tests_neq)},
+ {"gt", i64_gt, i64_gt_select,
+ int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
+ {"le", i64_le, i64_le_select,
+ int64_tests_sle, ARR_SIZE(int64_tests_sle)},
+ {"ge", i64_ge, i64_ge_select,
+ int64_tests_sge, ARR_SIZE(int64_tests_sge)},
+ {"lt", i64_lt, i64_lt_select,
+ int64_tests_slt, ARR_SIZE(int64_tests_slt)}
+};
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_uint64_pred uint64_tests_ugt[] = {
+ {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
+ {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+ {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+ {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
+};
+
+struct harness_uint64_pred uint64_tests_ule[] = {
+ {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_uge[] = {
+ {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_ult[] = {
+ {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+ {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+ {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+ (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
+};
+
+struct uint64_pred_s uint64_preds[] = {
+ {"ugt", i64_ugt, i64_ugt_select,
+ uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
+ {"ule", i64_ule, i64_ule_select,
+ uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
+ {"uge", i64_uge, i64_uge_select,
+ uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
+ {"ult", i64_ult, i64_ult_select,
+ uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
+};
+
+int
+compare_expect_int64(const struct int64_pred_s * pred)
+{
+ int j, failed = 0;
+
+ for (j = 0; j < pred->n_tests; ++j) {
+ int pred_result;
+
+ pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+
+ if (pred_result != pred->tests[j].expected) {
+ char str[64];
+
+ sprintf(str, pred->tests[j].fmt_string, pred->name);
+ printf("%s: returned value is %d, expecting %d\n", str,
+ pred_result, pred->tests[j].expected);
+ printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+ *pred->tests[j].lhs);
+ printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+ *pred->tests[j].rhs);
+ ++failed;
+ } else {
+ int64_t selresult;
+
+ selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+ *pred->tests[j].select_a,
+ *pred->tests[j].select_b);
+
+ if (selresult != *pred->tests[j].select_expected) {
+ char str[64];
+
+ sprintf(str, pred->tests[j].fmt_string, pred->name);
+ printf("%s select: returned value is %d, expecting %d\n", str,
+ pred_result, pred->tests[j].expected);
+ printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+ *pred->tests[j].lhs);
+ printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+ *pred->tests[j].rhs);
+ printf(" true = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
+ *pred->tests[j].select_a);
+ printf(" false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
+ *pred->tests[j].select_b);
+ ++failed;
+ }
+ }
+ }
+
+ printf(" %d tests performed, should be %d.\n", j, pred->n_tests);
+
+ return failed;
+}
+
+int
+compare_expect_uint64(const struct uint64_pred_s * pred)
+{
+ int j, failed = 0;
+
+ for (j = 0; j < pred->n_tests; ++j) {
+ int pred_result;
+
+ pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+ if (pred_result != pred->tests[j].expected) {
+ char str[64];
+
+ sprintf(str, pred->tests[j].fmt_string, pred->name);
+ printf("%s: returned value is %d, expecting %d\n", str,
+ pred_result, pred->tests[j].expected);
+ printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+ *pred->tests[j].lhs);
+ printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+ *pred->tests[j].rhs);
+ ++failed;
+ } else {
+ uint64_t selresult;
+
+ selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+ *pred->tests[j].select_a,
+ *pred->tests[j].select_b);
+ if (selresult != *pred->tests[j].select_expected) {
+ char str[64];
+
+ sprintf(str, pred->tests[j].fmt_string, pred->name);
+ printf("%s select: returned value is %d, expecting %d\n", str,
+ pred_result, pred->tests[j].expected);
+ printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+ *pred->tests[j].lhs);
+ printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+ *pred->tests[j].rhs);
+ printf(" true = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
+ *pred->tests[j].select_a);
+ printf(" false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
+ *pred->tests[j].select_b);
+ ++failed;
+ }
+ }
+ }
+
+ printf(" %d tests performed, should be %d.\n", j, pred->n_tests);
+
+ return failed;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+test_i64_sext_i32(int in, int64_t expected) {
+ int64_t result = (int64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_sext_i16(short in, int64_t expected) {
+ int64_t result = (int64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_sext_i8(signed char in, int64_t expected) {
+ int64_t result = (int64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_zext_i32(unsigned int in, uint64_t expected) {
+ uint64_t result = (uint64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_zext_i16(unsigned short in, uint64_t expected) {
+ uint64_t result = (uint64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_zext_i8(unsigned char in, uint64_t expected) {
+ uint64_t result = (uint64_t) in;
+
+ if (result != expected) {
+ char str[64];
+ sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t
+i64_shl_const(int64_t a) {
+ return a << 10;
+}
+
+int64_t
+i64_shl(int64_t a, int amt) {
+ return a << amt;
+}
+
+uint64_t
+u64_shl_const(uint64_t a) {
+ return a << 10;
+}
+
+uint64_t
+u64_shl(uint64_t a, int amt) {
+ return a << amt;
+}
+
+int64_t
+i64_srl_const(int64_t a) {
+ return a >> 10;
+}
+
+int64_t
+i64_srl(int64_t a, int amt) {
+ return a >> amt;
+}
+
+uint64_t
+u64_srl_const(uint64_t a) {
+ return a >> 10;
+}
+
+uint64_t
+u64_srl(uint64_t a, int amt) {
+ return a >> amt;
+}
+
+int64_t
+i64_sra_const(int64_t a) {
+ return a >> 10;
+}
+
+int64_t
+i64_sra(int64_t a, int amt) {
+ return a >> amt;
+}
+
+uint64_t
+u64_sra_const(uint64_t a) {
+ return a >> 10;
+}
+
+uint64_t
+u64_sra(uint64_t a, int amt) {
+ return a >> amt;
+}
+
+int
+test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
+ uint64_t result = (*func)(a);
+
+ if (result != expected) {
+ printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
+ int64_t result = (*func)(a);
+
+ if (result != expected) {
+ printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
+ uint64_t result = (*func)(a, b);
+
+ if (result != expected) {
+ printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
+ int64_t result = (*func)(a, b);
+
+ if (result != expected) {
+ printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t i64_mul(int64_t a, int64_t b) {
+ return a * b;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+main(void)
+{
+ int i, j, failed = 0;
+ const char *something_failed = " %d tests failed.\n";
+ const char *all_tests_passed = " All tests passed.\n";
+
+ printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
+ printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
+ printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
+ printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
+ printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
+ printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
+ printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
+ printf("----------------------------------------\n");
+
+ for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
+ printf("%s series:\n", int64_preds[i].name);
+ if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
+ printf(something_failed, failed);
+ } else {
+ printf(all_tests_passed);
+ }
+
+ printf("----------------------------------------\n");
+ }
+
+ for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
+ printf("%s series:\n", uint64_preds[i].name);
+ if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
+ printf(something_failed, failed);
+ } else {
+ printf(all_tests_passed);
+ }
+
+ printf("----------------------------------------\n");
+ }
+
+ /*----------------------------------------------------------------------*/
+
+ puts("signed/zero-extend tests:");
+
+ failed = 0;
+ failed += test_i64_sext_i32(-1, -1LL);
+ failed += test_i64_sext_i32(10, 10LL);
+ failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
+ failed += test_i64_sext_i16(-1, -1LL);
+ failed += test_i64_sext_i16(10, 10LL);
+ failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
+ failed += test_i64_sext_i8(-1, -1LL);
+ failed += test_i64_sext_i8(10, 10LL);
+ failed += test_i64_sext_i8(0x7f, 0x7fLL);
+
+ failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
+ failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
+ failed += test_i64_zext_i16(0xffff, 0x000000000000ffffLLU);
+ failed += test_i64_zext_i16(0x569a, 0x000000000000569aLLU);
+ failed += test_i64_zext_i8(0xff, 0x00000000000000ffLLU);
+ failed += test_i64_zext_i8(0xa0, 0x00000000000000a0LLU);
+
+ if (failed > 0) {
+ printf(" %d tests failed.\n", failed);
+ } else {
+ printf(" All tests passed.\n");
+ }
+
+ printf("----------------------------------------\n");
+
+ failed = 0;
+ puts("signed left/right shift tests:");
+ failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a, 0x00047dc7ec114c00LL);
+ failed += test_i64_variable_shift("i64_shl", i64_shl, tval_a, 10, 0x00047dc7ec114c00LL);
+ failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a, 0x0000000047dc7ec1LL);
+ failed += test_i64_variable_shift("i64_srl", i64_srl, tval_a, 10, 0x0000000047dc7ec1LL);
+ failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a, 0x0000000047dc7ec1LL);
+ failed += test_i64_variable_shift("i64_sra", i64_sra, tval_a, 10, 0x0000000047dc7ec1LL);
+
+ if (failed > 0) {
+ printf(" %d tests ailed.\n", failed);
+ } else {
+ printf(" All tests passed.\n");
+ }
+
+ printf("----------------------------------------\n");
+
+ failed = 0;
+ puts("unsigned left/right shift tests:");
+ failed += test_u64_constant_shift("u64_shl_const", u64_shl_const, tval_f, 0xfffc1d404d7ae400LL);
+ failed += test_u64_variable_shift("u64_shl", u64_shl, tval_f, 10, 0xfffc1d404d7ae400LL);
+ failed += test_u64_constant_shift("u64_srl_const", u64_srl_const, tval_f, 0x003fffffc1d404d7LL);
+ failed += test_u64_variable_shift("u64_srl", u64_srl, tval_f, 10, 0x003fffffc1d404d7LL);
+ failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_f, 0xffffffffc1d404d7LL);
+ failed += test_i64_variable_shift("i64_sra", i64_sra, tval_f, 10, 0xffffffffc1d404d7LL);
+ failed += test_u64_constant_shift("u64_sra_const", u64_sra_const, tval_f, 0x003fffffc1d404d7LL);
+ failed += test_u64_variable_shift("u64_sra", u64_sra, tval_f, 10, 0x003fffffc1d404d7LL);
+
+ if (failed > 0) {
+ printf(" %d tests ailed.\n", failed);
+ } else {
+ printf(" All tests passed.\n");
+ }
+
+ printf("----------------------------------------\n");
+
+ int64_t result;
+
+ result = i64_mul(tval_g, tval_g);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
+ result = i64_mul(tval_d, tval_e);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
+ /* 0xba7a664f13077c9 */
+ result = i64_mul(tval_a, tval_b);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
+
+ printf("----------------------------------------\n");
+
+ return 0;
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
new file mode 100644
index 000000000000..7a02794cd7e0
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
@@ -0,0 +1,43 @@
+#define TRUE_VAL (!0)
+#define FALSE_VAL 0
+#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
+
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+struct harness_int64_pred {
+ const char *fmt_string;
+ int64_t *lhs;
+ int64_t *rhs;
+ int64_t *select_a;
+ int64_t *select_b;
+ int expected;
+ int64_t *select_expected;
+};
+
+struct harness_uint64_pred {
+ const char *fmt_string;
+ uint64_t *lhs;
+ uint64_t *rhs;
+ uint64_t *select_a;
+ uint64_t *select_b;
+ int expected;
+ uint64_t *select_expected;
+};
+
+struct int64_pred_s {
+ const char *name;
+ int (*predfunc) (int64_t, int64_t);
+ int64_t (*selfunc) (int64_t, int64_t, int64_t, int64_t);
+ struct harness_int64_pred *tests;
+ int n_tests;
+};
+
+struct uint64_pred_s {
+ const char *name;
+ int (*predfunc) (uint64_t, uint64_t);
+ uint64_t (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
+ struct harness_uint64_pred *tests;
+ int n_tests;
+};
diff --git a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
new file mode 100644
index 000000000000..c4c86e37635d
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
@@ -0,0 +1,179 @@
+#include <stdio.h>
+
+typedef unsigned char v16i8 __attribute__((ext_vector_type(16)));
+typedef short v8i16 __attribute__((ext_vector_type(16)));
+typedef int v4i32 __attribute__((ext_vector_type(4)));
+typedef float v4f32 __attribute__((ext_vector_type(4)));
+typedef long long v2i64 __attribute__((ext_vector_type(2)));
+typedef double v2f64 __attribute__((ext_vector_type(2)));
+
+void print_v16i8(const char *str, const v16i8 v) {
+ union {
+ unsigned char elts[16];
+ v16i8 vec;
+ } tv;
+ tv.vec = v;
+ printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+ "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+ "%hhu, %hhu }\n",
+ str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+ tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+ tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v16i8_hex(const char *str, const v16i8 v) {
+ union {
+ unsigned char elts[16];
+ v16i8 vec;
+ } tv;
+ tv.vec = v;
+ printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+ "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+ "0x%02hhx, 0x%02hhx }\n",
+ str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+ tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+ tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v8i16_hex(const char *str, v8i16 v) {
+ union {
+ short elts[8];
+ v8i16 vec;
+ } tv;
+ tv.vec = v;
+ printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
+ "0x%04hx, 0x%04hx, 0x%04hx }\n",
+ str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
+ tv.elts[5], tv.elts[6], tv.elts[7]);
+}
+
+void print_v4i32(const char *str, v4i32 v) {
+ printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v4f32(const char *str, v4f32 v) {
+ printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v2i64(const char *str, v2i64 v) {
+ printf("%s = { %lld, %lld }\n", str, v.x, v.y);
+}
+
+void print_v2f64(const char *str, v2f64 v) {
+ printf("%s = { %g, %g }\n", str, v.x, v.y);
+}
+
+/*----------------------------------------------------------------------*/
+
+v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
+ return v1 * v2;
+}
+
+v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
+ return v1 + v2;
+}
+
+v4i32 v4i32_shuffle_1(v4i32 a) {
+ v4i32 c2 = a.yzwx;
+ return c2;
+}
+
+v4i32 v4i32_shuffle_2(v4i32 a) {
+ v4i32 c2 = a.zwxy;
+ return c2;
+}
+
+v4i32 v4i32_shuffle_3(v4i32 a) {
+ v4i32 c2 = a.wxyz;
+ return c2;
+}
+
+v4i32 v4i32_shuffle_4(v4i32 a) {
+ v4i32 c2 = a.xyzw;
+ return c2;
+}
+
+v4i32 v4i32_shuffle_5(v4i32 a) {
+ v4i32 c2 = a.xwzy;
+ return c2;
+}
+
+v4f32 v4f32_shuffle_1(v4f32 a) {
+ v4f32 c2 = a.yzwx;
+ return c2;
+}
+
+v4f32 v4f32_shuffle_2(v4f32 a) {
+ v4f32 c2 = a.zwxy;
+ return c2;
+}
+
+v4f32 v4f32_shuffle_3(v4f32 a) {
+ v4f32 c2 = a.wxyz;
+ return c2;
+}
+
+v4f32 v4f32_shuffle_4(v4f32 a) {
+ v4f32 c2 = a.xyzw;
+ return c2;
+}
+
+v4f32 v4f32_shuffle_5(v4f32 a) {
+ v4f32 c2 = a.xwzy;
+ return c2;
+}
+
+v2i64 v2i64_shuffle(v2i64 a) {
+ v2i64 c2 = a.yx;
+ return c2;
+}
+
+v2f64 v2f64_shuffle(v2f64 a) {
+ v2f64 c2 = a.yx;
+ return c2;
+}
+
+int main(void) {
+ v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
+ 0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
+ v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
+ v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
+ 0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
+ v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
+ 0xe194, 0x0184, 0x801e, 0x5940 };
+ v4i32 v1 = { 1, 2, 3, 4 };
+ v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
+ v2i64 v3 = { 691043ll, 910301513ll };
+ v2f64 v4 = { 5.8e56, 9.103e-62 };
+
+ puts("---- vector tests start ----");
+
+ print_v16i8_hex("v00 ", v00);
+ print_v16i8_hex("va0 ", va0);
+ print_v16i8_hex("va1 ", va1);
+ print_v16i8_hex("va0 x va1 ", v16i8_mpy(va0, va1));
+ print_v16i8_hex("va0 + va1 ", v16i8_add(va0, va1));
+ print_v8i16_hex("v01 ", v01);
+
+ print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
+ print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
+ print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
+ print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
+ print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
+
+ print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
+ print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
+ print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
+ print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
+ print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
+
+ print_v2i64("v3 ", v3);
+ print_v2i64("v2i64_shuffle ", v2i64_shuffle(v3));
+ print_v2f64("v4 ", v4);
+ print_v2f64("v2f64_shuffle ", v2f64_shuffle(v4));
+
+ puts("---- vector tests end ----");
+
+ return 0;
+}
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
new file mode 100644
index 000000000000..4b29adc80921
--- /dev/null
+++ b/test/CodeGen/CellSPU/vec_const.ll
@@ -0,0 +1,154 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep -w il %t1.s | count 3
+; RUN: grep ilhu %t1.s | count 8
+; RUN: grep -w ilh %t1.s | count 5
+; RUN: grep iohl %t1.s | count 7
+; RUN: grep lqa %t1.s | count 6
+; RUN: grep 24672 %t1.s | count 2
+; RUN: grep 16429 %t1.s | count 1
+; RUN: grep 63572 %t1.s | count 1
+; RUN: grep 4660 %t1.s | count 1
+; RUN: grep 22136 %t1.s | count 1
+; RUN: grep 43981 %t1.s | count 1
+; RUN: grep 61202 %t1.s | count 1
+; RUN: grep 16393 %t1.s | count 1
+; RUN: grep 8699 %t1.s | count 1
+; RUN: grep 21572 %t1.s | count 1
+; RUN: grep 11544 %t1.s | count 1
+; RUN: grep 1311768467750121234 %t1.s | count 1
+; RUN: grep lqd %t2.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; Vector constant load tests:
+
+; IL <reg>, 2
+define <4 x i32> @v4i32_constvec() {
+ ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
+}
+
+; Spill to constant pool
+define <4 x i32> @v4i32_constpool() {
+ ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
+}
+
+; Max negative range for IL
+define <4 x i32> @v4i32_constvec_2() {
+ ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
+}
+
+; ILHU <reg>, 73 (0x49)
+; 4784128 = 0x490000
+define <4 x i32> @v4i32_constvec_3() {
+ ret <4 x i32> < i32 4784128, i32 4784128,
+ i32 4784128, i32 4784128 >
+}
+
+; ILHU <reg>, 61 (0x3d)
+; IOHL <reg>, 15395 (0x3c23)
+define <4 x i32> @v4i32_constvec_4() {
+ ret <4 x i32> < i32 4013091, i32 4013091,
+ i32 4013091, i32 4013091 >
+}
+
+; ILHU <reg>, 0x5050 (20560)
+; IOHL <reg>, 0x5050 (20560)
+; Tests for whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x50)
+define <4 x i32> @v4i32_constvec_5() {
+ ret <4 x i32> < i32 1347440720, i32 1347440720,
+ i32 1347440720, i32 1347440720 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_1() {
+ ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
+ i16 32767, i16 32767, i16 32767, i16 32767 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_2() {
+ ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
+ i16 511, i16 511, i16 511 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_3() {
+ ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
+ i16 -512, i16 -512, i16 -512 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x60)
+define <8 x i16> @v8i16_constvec_4() {
+ ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
+ i16 24672, i16 24672, i16 24672 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this is an i8 pattern but has to be expanded out to i16 to load it
+; properly into the vector register.
+define <16 x i8> @v16i8_constvec_1() {
+ ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
+ i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
+}
+
+define <4 x float> @v4f32_constvec_1() {
+entry:
+ ret <4 x float> < float 0x4005BF0A80000000,
+ float 0x4005BF0A80000000,
+ float 0x4005BF0A80000000,
+ float 0x4005BF0A80000000 >
+}
+
+define <4 x float> @v4f32_constvec_2() {
+entry:
+ ret <4 x float> < float 0.000000e+00,
+ float 0.000000e+00,
+ float 0.000000e+00,
+ float 0.000000e+00 >
+}
+
+
+define <4 x float> @v4f32_constvec_3() {
+entry:
+ ret <4 x float> < float 0x4005BF0A80000000,
+ float 0x3810000000000000,
+ float 0x47EFFFFFE0000000,
+ float 0x400921FB60000000 >
+}
+
+; 1311768467750121234 => 0x 12345678 abcdef12
+; HI32_hi: 4660
+; HI32_lo: 22136
+; LO32_hi: 43981
+; LO32_lo: 61202
+define <2 x i64> @i64_constvec_1() {
+entry:
+ ret <2 x i64> < i64 1311768467750121234,
+ i64 1311768467750121234 >
+}
+
+define <2 x i64> @i64_constvec_2() {
+entry:
+ ret <2 x i64> < i64 1, i64 1311768467750121234 >
+}
+
+define <2 x double> @f64_constvec_1() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+ double 0xbff6a09e667f3bcd >
+}
+
+; 0x400921fb 54442d18 ->
+; (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
+; (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
+define <2 x double> @f64_constvec_2() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+ double 0x400921fb54442d18 >
+}
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
new file mode 100644
index 000000000000..6abbd9ac797d
--- /dev/null
+++ b/test/CodeGen/CellSPU/vecinsert.ll
@@ -0,0 +1,120 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep cbd %t1.s | count 5
+; RUN: grep chd %t1.s | count 5
+; RUN: grep cwd %t1.s | count 10
+; RUN: grep -w il %t1.s | count 5
+; RUN: grep -w ilh %t1.s | count 6
+; RUN: grep iohl %t1.s | count 1
+; RUN: grep ilhu %t1.s | count 4
+; RUN: grep shufb %t1.s | count 26
+; RUN: grep 17219 %t1.s | count 1
+; RUN: grep 22598 %t1.s | count 1
+; RUN: grep -- -39 %t1.s | count 1
+; RUN: grep 24 %t1.s | count 1
+; RUN: grep 1159 %t1.s | count 1
+; ModuleID = 'vecinsert.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
+define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
+entry:
+ %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
+ %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
+ %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
+ ret <16 x i8> %tmp1.2
+}
+
+; 22598 -> 0x5846
+define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
+entry:
+ %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
+ %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
+ %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
+ ret <8 x i16> %tmp1.2
+}
+
+; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
+define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
+entry:
+ %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+ %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
+ %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+ ret <4 x i32> %tmp1.2
+}
+
+; Should generate IL for the load
+define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
+entry:
+ %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+ %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
+ %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+ ret <4 x i32> %tmp1.2
+}
+
+define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <16 x i8>* %a, i32 %i
+ %tmp2 = load <16 x i8>* %arrayidx
+ %tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
+ %tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
+ store <16 x i8> %tmp8, <16 x i8>* %arrayidx
+ ret void
+}
+
+define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <8 x i16>* %a, i32 %i
+ %tmp2 = load <8 x i16>* %arrayidx
+ %tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
+ %tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
+ store <8 x i16> %tmp8, <8 x i16>* %arrayidx
+ ret void
+}
+
+define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x i32>* %a, i32 %i
+ %tmp2 = load <4 x i32>* %arrayidx
+ %tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
+ %tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
+ store <4 x i32> %tmp8, <4 x i32>* %arrayidx
+ ret void
+}
+
+define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 %i
+ %tmp2 = load <4 x float>* %arrayidx
+ %tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
+ %tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
+ store <4 x float> %tmp8, <4 x float>* %arrayidx
+ ret void
+}
+
+define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <2 x i64>* %a, i32 %i
+ %tmp2 = load <2 x i64>* %arrayidx
+ %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
+ store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+ ret void
+}
+
+define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <2 x i64>* %a, i32 %i
+ %tmp2 = load <2 x i64>* %arrayidx
+ %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
+ store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+ ret void
+}
+
+define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
+entry:
+ %arrayidx = getelementptr <2 x double>* %a, i32 %i
+ %tmp2 = load <2 x double>* %arrayidx
+ %tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
+ store <2 x double> %tmp3, <2 x double>* %arrayidx
+ ret void
+}