vendor/llvm/llvm-release_350-r216957

author: Dimitry Andric <dim@FreeBSD.org> 2014-11-24 09:08:18 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2014-11-24 09:08:18 +0000
commit: 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch)
tree: f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /test/CodeGen/Mips
parent: 68bcb7db193e4bc81430063148253d30a791023e (diff)
194 files changed, 11665 insertions, 1989 deletions
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index 3381143c761d7..8807d750e4992 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
-; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s 
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 < %s | FileCheck %s
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index e274bc0e14f0d..3c1bb39b4340b 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
 %struct.DWstruct = type { i32, i32 }
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index 2b2ee0fd7ad86..c3791dfc7ce68 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 define float @h() nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 38d7b7e25592a..5c840775cf9e3 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -2,10 +2,14 @@
 ; RUN: FileCheck %s -check-prefix=STATIC-O32 
 ; RUN: llc < %s -march=mips -relocation-model=pic | \
 ; RUN: FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips4 | \
+; RUN:     FileCheck %s -check-prefix=N64
+; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips4 | \
+; RUN:     FileCheck %s -check-prefix=N64
 ; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 | \
-; RUN: FileCheck %s -check-prefix=N64
+; RUN:     FileCheck %s -check-prefix=N64
 ; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips64 | \
-; RUN: FileCheck %s -check-prefix=N64
+; RUN:     FileCheck %s -check-prefix=N64
 
 define i32 @main() nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
index f8390d9a1ca75..6a210a0c76ced 100644
--- a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
+++ b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=mips -mattr=-fp64 < %s | FileCheck -check-prefix=CHECK-FP32 %s
-; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
 
 ; This test case is a simplified version of an llvm-stress generated test with
 ; seed=3718491962.
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
new file mode 100644
index 0000000000000..f113a0eb1d542
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -0,0 +1,83 @@
+; ModuleID = 'loadstore2.c'
+target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@c2 = common global i8 0, align 1
+@c1 = common global i8 0, align 1
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@s2 = common global i16 0, align 2
+@s1 = common global i16 0, align 2
+@i2 = common global i32 0, align 4
+@i1 = common global i32 0, align 4
+@f2 = common global float 0.000000e+00, align 4
+@f1 = common global float 0.000000e+00, align 4
+@d2 = common global double 0.000000e+00, align 8
+@d1 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @cfoo() #0 {
+entry:
+  %0 = load i8* @c2, align 1
+  store i8 %0, i8* @c1, align 1
+; CHECK-LABEL:	cfoo:
+; CHECK:	lbu	$[[REGc:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sb	$[[REGc]], 0(${{[0-9]+}})
+
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sfoo() #0 {
+entry:
+  %0 = load i16* @s2, align 2
+  store i16 %0, i16* @s1, align 2
+; CHECK-LABEL:	sfoo:
+; CHECK:	lhu	$[[REGs:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sh	$[[REGs]], 0(${{[0-9]+}})
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ifoo() #0 {
+entry:
+  %0 = load i32* @i2, align 4
+  store i32 %0, i32* @i1, align 4
+; CHECK-LABEL:	ifoo:
+; CHECK:	lw	$[[REGi:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sw	$[[REGi]], 0(${{[0-9]+}})
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ffoo() #0 {
+entry:
+  %0 = load float* @f2, align 4
+  store float %0, float* @f1, align 4
+; CHECK-LABEL:	ffoo:
+; CHECK:	lwc1	$f[[REGf:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	swc1	$f[[REGf]], 0(${{[0-9]+}})
+
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dfoo() #0 {
+entry:
+  %0 = load double* @d2, align 8
+  store double %0, double* @d1, align 8
+; CHECK-LABEL:        dfoo:
+; CHECK:        ldc1    $f[[REGd:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:        sdc1    $f[[REGd]], 0(${{[0-9]+}})
+; CHECK:        .end    dfoo
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
new file mode 100644
index 0000000000000..eeaff878bf549
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define void @foo() {
+entry:
+  ret void
+; CHECK: jr	$ra
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestore.ll b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
new file mode 100644
index 0000000000000..5d52481dfdf3c
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@abcd = external global i32
+
+; Function Attrs: nounwind
+define void @foo()  {
+entry:
+  store i32 12345, i32* @abcd, align 4
+; CHECK: 	addiu	$[[REG1:[0-9]+]], $zero, 12345
+; CHECK: 	lw	$[[REG2:[0-9]+]], %got(abcd)(${{[0-9]+}})
+; CHECK: 	sw	$[[REG1]], 0($[[REG2]])
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
new file mode 100644
index 0000000000000..6759c01c774bc
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@f = common global float 0.000000e+00, align 4
+@de = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @f1() #0 {
+entry:
+  store float 0x3FFA76C8C0000000, float* @f, align 4
+  ret void
+; CHECK:  .ent  f1
+; CHECK:  lui  $[[REG1:[0-9]+]], 16339
+; CHECK:  ori  $[[REG2:[0-9]+]], $[[REG1]], 46662
+; CHECK:  mtc1  $[[REG2]], $f[[REG3:[0-9]+]]
+; CHECK:  lw  $[[REG4:[0-9]+]], %got(f)(${{[0-9]+}})
+; CHECK:  swc1  $f[[REG3]], 0($[[REG4]])
+; CHECK:   .end  f1
+
+}
+
+; Function Attrs: nounwind
+define void @d1() #0 {
+entry:
+  store double 1.234567e+00, double* @de, align 8
+; CHECK:  .ent  d1
+; CHECK:  lui  $[[REG1a:[0-9]+]], 16371
+; CHECK:  ori  $[[REG2a:[0-9]+]], $[[REG1a]], 49353
+; CHECK:  lui  $[[REG1b:[0-9]+]], 21403
+; CHECK:  ori  $[[REG2b:[0-9]+]], $[[REG1b]], 34951
+; CHECK:  mtc1  $[[REG2b]], $f[[REG3:[0-9]+]]
+; CHECK:  mthc1  $[[REG2a]], $f[[REG3]]
+; CHECK:  sdc1  $f[[REG3]], 0(${{[0-9]+}})
+; CHECK:  .end  d1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
new file mode 100644
index 0000000000000..7d2c8e73c352e
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@ijk = external global i32
+
+; Function Attrs: nounwind
+define void @si2_1() #0 {
+entry:
+  store i32 32767, i32* @ijk, align 4
+; CHECK:        .ent    si2_1
+; CHECK:        addiu   $[[REG1:[0-9]+]], $zero, 32767
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @si2_2() #0 {
+entry:
+  store i32 -32768, i32* @ijk, align 4
+; CHECK:        .ent    si2_2
+; CHECK:        addiu   $[[REG1:[0-9]+]], $zero, -32768
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui2_1() #0 {
+entry:
+  store i32 65535, i32* @ijk, align 4
+; CHECK:        .ent    ui2_1
+; CHECK:        ori     $[[REG1:[0-9]+]], $zero, 65535
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui4_1() #0 {
+entry:
+  store i32 983040, i32* @ijk, align 4
+; CHECK:        .ent    ui4_1
+; CHECK:        lui     $[[REG1:[0-9]+]], 15
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui4_2() #0 {
+entry:
+  store i32 719566, i32* @ijk, align 4
+; CHECK:        .ent    ui4_2
+; CHECK:        lui	$[[REG1:[0-9]+]], 10
+; CHECK: 	ori	$[[REG1]], $[[REG1]], 64206
+; CHECK: 	lw	$[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK: 	sw	$[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/abicalls.ll b/test/CodeGen/Mips/abicalls.ll
new file mode 100644
index 0000000000000..6fa33aa158ad6
--- /dev/null
+++ b/test/CodeGen/Mips/abicalls.ll
@@ -0,0 +1,16 @@
+; 
+; When the assembler is ready a .s file for it will
+; be created.
+
+; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
+; TODO need to support -mno-abicalls
+
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+
+; CHECK-STATIC: .abicalls
+; CHECK-STATIC-NEXT: pic0
+; CHECK-PIC: .abicalls
+; CHECK-PIC-NOT: pic0
diff --git a/test/CodeGen/Mips/abiflags-xx.ll b/test/CodeGen/Mips/abiflags-xx.ll
new file mode 100644
index 0000000000000..c4610120fdd56
--- /dev/null
+++ b/test/CodeGen/Mips/abiflags-xx.ll
@@ -0,0 +1,5 @@
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fpxx %s -o - | FileCheck %s
+
+; CHECK: .nan    legacy
+; CHECK: .module fp=xx
+
diff --git a/test/CodeGen/Mips/abiflags32.ll b/test/CodeGen/Mips/abiflags32.ll
new file mode 100644
index 0000000000000..e32d4a586ee3d
--- /dev/null
+++ b/test/CodeGen/Mips/abiflags32.ll
@@ -0,0 +1,17 @@
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fp64 %s -o - | FileCheck  -check-prefix=CHECK-64 %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -mattr=-n64,n32 %s -o - | FileCheck  -check-prefix=CHECK-64n %s
+
+; CHECK: .nan    legacy
+; We don't emit '.module fp=32' for compatibility with binutils 2.24 which
+; doesn't accept .module.
+; CHECK-NOT: .module fp=32
+
+; CHECK-64: .nan    legacy
+; We do emit '.module fp=64' though since it contradicts the default value.
+; CHECK-64: .module fp=64
+
+; CHECK-64n: .nan    legacy
+; We don't emit '.module fp=64' for compatibility with binutils 2.24 which
+; doesn't accept .module.
+; CHECK-64n-NOT: .module fp=64
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
index 8f70a469c44f8..01d409e521d77 100644
--- a/test/CodeGen/Mips/addi.ll
+++ b/test/CodeGen/Mips/addi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 6, align 4
 @j = global i32 12, align 4
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
index 267cff54291d0..689ae8307f572 100644
--- a/test/CodeGen/Mips/align16.ll
+++ b/test/CodeGen/Mips/align16.ll
@@ -25,7 +25,7 @@ entry:
   call void @p(i32* %arrayidx1)
   ret void
 }
-; 16:	save	$ra, $s0, $s1, $s2, 2040
-; 16:	addiu	$sp, -56 # 16 bit inst
-; 16:	addiu	$sp, 56 # 16 bit inst
-; 16:	restore	$ra,  $s0, $s1, $s2, 2040
+; 16:	save	$ra, 2040
+; 16:	addiu	$sp, -40 # 16 bit inst
+; 16:	addiu	$sp, 40 # 16 bit inst
+; 16:	restore	$ra, 2040
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index 017665f00bd46..4f6059878c3bd 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -19,8 +19,8 @@ entry:
 
 define void @test() nounwind {
 entry:
-; 16: 	.frame	$sp,24,$ra
-; 16: 	save 	$ra, $s0, $s1, $s2, 24
+; 16: 	.frame	$sp,8,$ra
+; 16: 	save 	8 # 16 bit inst
 ; 16: 	move	$16, $sp
 ; 16:	move	${{[0-9]+}}, $sp
 ; 16:	subu	$[[REGISTER:[0-9]+]], ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 8ec5d93139940..4b5d09778d798 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -1,9 +1,25 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=32-GPR
+; RUN: llc -march=mips64 -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=64-GPR
 
 define double @foo(double %a, double %b) nounwind readnone {
 entry:
-; CHECK: bc1f $BB
-; CHECK: nop
+; ALL-LABEL: foo:
+
+; FCC:           bc1f $BB
+; FCC:           nop
+
+; 32-GPR:        mtc1      $zero, $[[Z:f[0-9]]]
+; 32-GPR:        mthc1     $zero, $[[Z:f[0-9]]]
+; 64-GPR:        dmtc1     $zero, $[[Z:f[0-9]]]
+; GPR:           cmp.lt.d  $[[FGRCC:f[0-9]+]], $[[Z]], $f12
+; GPR:           mfc1      $[[GPRCC:[0-9]+]], $[[FGRCC]]
+; GPR-NOT:       not       $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez      $[[GPRCC]], $BB
 
   %cmp = fcmp ogt double %a, 0.000000e+00
   br i1 %cmp, label %if.end6, label %if.else
@@ -25,8 +41,17 @@ return:                                           ; preds = %if.else, %if.end6
 
 define void @f1(float %f) nounwind {
 entry:
-; CHECK: bc1f $BB
-; CHECK: nop
+; ALL-LABEL: f1:
+
+; FCC:           bc1f $BB
+; FCC:           nop
+
+; GPR:           mtc1     $zero, $[[Z:f[0-9]]]
+; GPR:           cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $[[Z]]
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB
+
   %cmp = fcmp une float %f, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 0e60fe1fbfbc5..f4118ecec79d1 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -1,5 +1,14 @@
-; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc -march=mips   --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+
+; Keep one big-endian check so that we don't reduce testing, but don't add more
+; since endianness doesn't affect the body of the atomic operations.
+; RUN: llc -march=mips   --disable-machine-licm -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EB
 
 @x = common global i32 0, align 4
 
@@ -8,21 +17,16 @@ entry:
   %0 = atomicrmw add i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicLoadAdd32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicLoadAdd32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicLoadAdd32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[R0]])
+; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
+; ALL:           sc      $[[R2]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicLoadNand32(i32 %incr) nounwind {
@@ -30,23 +34,17 @@ entry:
   %0 = atomicrmw nand i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicLoadNand32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   and     $[[R3:[0-9]+]], $[[R1]], $4
-; CHECK-EL:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
-; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicLoadNand32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   and     $[[R3:[0-9]+]], $[[R1]], $4
-; CHECK-EB:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
-; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicLoadNand32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[R0]])
+; ALL:           and     $[[R3:[0-9]+]], $[[R1]], $4
+; ALL:           nor     $[[R2:[0-9]+]], $zero, $[[R3]]
+; ALL:           sc      $[[R2]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicSwap32(i32 %newval) nounwind {
@@ -57,19 +55,15 @@ entry:
   %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicSwap32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      ${{[0-9]+}}, 0($[[R0]])
-; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicSwap32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      ${{[0-9]+}}, 0($[[R0]])
-; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicSwap32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      ${{[0-9]+}}, 0($[[R0]])
+; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
@@ -77,26 +71,21 @@ entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
   %tmp = load i32* %newval.addr, align 4
-  %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic
-  ret i32 %0
+  %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
+  %1 = extractvalue { i32, i1 } %0, 0
+  ret i32 %1
+
+; ALL-LABEL: AtomicCmpSwap32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
 
-; CHECK-EL-LABEL:   AtomicCmpSwap32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $2, 0($[[R0]])
-; CHECK-EL:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
-; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-; CHECK-EL:   $[[BB1]]:
-
-; CHECK-EB-LABEL:   AtomicCmpSwap32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $2, 0($[[R0]])
-; CHECK-EB:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
-; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
-; CHECK-EB:   $[[BB1]]:
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $2, 0($[[R0]])
+; ALL:           bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
+; ALL:       $[[BB1]]:
 }
 
 
@@ -108,56 +97,38 @@ entry:
   %0 = atomicrmw add i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadAdd8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadAdd8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicLoadAdd8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
@@ -165,56 +136,38 @@ entry:
   %0 = atomicrmw sub i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadSub8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv     $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadSub8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; ALL-LABEL: AtomicLoadSub8:
+
+; MIPS32-ANY: lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY: ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:        addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:        and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:        andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R5:[0-9]+]], $[[R3]], 3
 ; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
 ; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL:        ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:        sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:        nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:        sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:    $[[BB0:[A-Z_0-9]+]]:
+; ALL:        ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:        subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:        and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:        and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:        or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:        sc      $[[R14]], 0($[[R2]])
+; ALL:        beqz    $[[R14]], $[[BB0]]
+
+; ALL:        and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:        srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH: sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH: sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
@@ -222,58 +175,39 @@ entry:
   %0 = atomicrmw nand i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadNand8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadNand8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicLoadNand8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
@@ -281,121 +215,170 @@ entry:
   %0 = atomicrmw xchg i8* @y, i8 %newval monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicSwap8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R9]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicSwap8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R9]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicSwap8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R18:[0-9]+]], $[[R9]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
+
 }
 
 define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
 entry:
-  %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic
+  %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
+  %0 = extractvalue { i8, i1 } %pair0, 0
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicCmpSwap8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   andi    $[[R8:[0-9]+]], $4, 255
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $[[R8]], $[[R4]]
-; CHECK-EL:   andi    $[[R10:[0-9]+]], $5, 255
-; CHECK-EL:   sllv    $[[R11:[0-9]+]], $[[R10]], $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R12:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
-; CHECK-EL:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
-
-; CHECK-EL:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
-; CHECK-EL:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
-; CHECK-EL:   sc      $[[R15]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R15]], $[[BB0]]
-
-; CHECK-EL:   $[[BB1]]:
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R13]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicCmpSwap8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   andi    $[[R9:[0-9]+]], $4, 255
-; CHECK-EB:   sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
-; CHECK-EB:   andi    $[[R11:[0-9]+]], $5, 255
-; CHECK-EB:   sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R13:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
-; CHECK-EB:   bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
-; CHECK-EB:   or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
-; CHECK-EB:   sc      $[[R16]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R16]], $[[BB0]]
-
-; CHECK-EB:   $[[BB1]]:
-; CHECK-EB:   srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
-; CHECK-EB:   sll     $[[R18:[0-9]+]], $[[R17]], 24
-; CHECK-EB:   sra     $2, $[[R18]], 24
+; ALL-LABEL: AtomicCmpSwap8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           andi    $[[R9:[0-9]+]], $4, 255
+; ALL:           sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
+; ALL:           andi    $[[R11:[0-9]+]], $5, 255
+; ALL:           sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R13:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
+; ALL:           bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
+; ALL:           or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
+; ALL:           sc      $[[R16]], 0($[[R2]])
+; ALL:           beqz    $[[R16]], $[[BB0]]
+
+; ALL:       $[[BB1]]:
+; ALL:           srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R18:[0-9]+]], $[[R17]], 24
+; NO-SEB-SEH:    sra     $2, $[[R18]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R17]]
 }
 
+define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind {
+entry:
+  %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
+  %1 = extractvalue { i8, i1 } %0, 1
+  ret i1 %1
+; ALL-LABEL: AtomicCmpSwapRes8
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $4, $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $4, 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           andi    $[[R9:[0-9]+]], $5, 255
+; ALL:           sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
+; ALL:           andi    $[[R11:[0-9]+]], $6, 255
+; ALL:           sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R13:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
+; ALL:           bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
+; ALL:           or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
+; ALL:           sc      $[[R16]], 0($[[R2]])
+; ALL:           beqz    $[[R16]], $[[BB0]]
+
+; ALL:       $[[BB1]]:
+; ALL:           srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R18:[0-9]+]], $[[R17]], 24
+; NO-SEB-SEH:    sra     $[[R19:[0-9]+]], $[[R18]], 24
+
+; HAS-SEB-SEH:   seb     $[[R19:[0-9]+]], $[[R17]]
+
+; ALL:           xor     $[[R20:[0-9]+]], $[[R19]], $5
+; ALL:           sltiu   $2, $[[R20]], 1
+}
+
+; Check one i16 so that we cover the seh sign extend
+@z = common global i16 0, align 1
+
+define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw add i16* @z, i16 %incr monotonic
+  ret i16 %0
+
+; ALL-LABEL: AtomicLoadAdd16:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(z)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(z)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 2
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 65535
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 16
+; NO-SEB-SEH:    sra     $2, $[[R17]], 16
+
+; MIPS32R2:      seh     $2, $[[R16]]
+}
+
+
 @countsint = common global i32 0, align 4
 
 define i32 @CheckSync(i32 %v) nounwind noinline {
@@ -403,19 +386,13 @@ entry:
   %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
   ret i32 %0 
 
-; CHECK-EL-LABEL:   CheckSync:
-; CHECK-EL:   sync 0
-; CHECK-EL:   ll
-; CHECK-EL:   sc
-; CHECK-EL:   beq
-; CHECK-EL:   sync 0
-
-; CHECK-EB-LABEL:   CheckSync:
-; CHECK-EB:   sync 0
-; CHECK-EB:   ll
-; CHECK-EB:   sc
-; CHECK-EB:   beq
-; CHECK-EB:   sync 0
+; ALL-LABEL: CheckSync:
+
+; ALL:           sync
+; ALL:           ll
+; ALL:           sc
+; ALL:           beq
+; ALL:           sync
 }
 
 ; make sure that this assertion in
@@ -429,8 +406,29 @@ entry:
 
 define i32 @zeroreg() nounwind {
 entry:
-  %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst
+  %pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst
+  %0 = extractvalue { i32, i1 } %pair0, 0
   %1 = icmp eq i32 %0, 1
   %conv = zext i1 %1 to i32
   ret i32 %conv
 }
+
+; Check that MIPS32R6 has the correct offset range.
+; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+entry:
+  %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
+  ret i32 %0
+
+; ALL-LABEL: AtomicLoadAdd32_OffGt9Bit:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:           addiu   $[[PTR:[0-9]+]], $[[R0]], 1024
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[PTR]])
+; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
+; ALL:           sc      $[[R2]], 0($[[PTR]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
+}
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index 0f0f01afc1420..c26415233d0b2 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -20,7 +20,8 @@ entry:
   %add.i = add nsw i32 %0, 2
   %1 = load volatile i32* %x, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
-  %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst
+  %pair = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
+  %2 = extractvalue { i32, i1 } %pair, 0
   %3 = load volatile i32* %x, align 4
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
diff --git a/test/CodeGen/Mips/blez_bgez.ll b/test/CodeGen/Mips/blez_bgez.ll
index f6a5e4f47a5af..dcda047f8d094 100644
--- a/test/CodeGen/Mips/blez_bgez.ll
+++ b/test/CodeGen/Mips/blez_bgez.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el < %s | FileCheck %s
 
 ; CHECK-LABEL: test_blez:
 ; CHECK: blez ${{[0-9]+}}, $BB
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index beab65f47196a..d6dc7e7b24b0f 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2
 
 @reg = common global i8* null, align 4
 
@@ -43,8 +43,8 @@ entry:
 ; STATIC-MIPS16-1: li  $[[R1_16:[0-9]+]], %hi($tmp[[TI_16:[0-9]+]])
 ; STATIC-MIPS16-1: sll ${{[0-9]+}},  $[[R1_16]], 16
 ; STATIC-MIPS16-2: li  ${{[0-9]+}}, %lo($tmp{{[0-9]+}})
-; STATIC-MIPS16-1 jal	dummy
-; STATIC-MIPS16-2 jal	dummy
+; STATIC-MIPS16-1: jal	dummy
+; STATIC-MIPS16-2: jal	dummy
 
 define void @f() nounwind {
 entry:
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
index f17b91aab8024..812eef1377737 100644
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@@ -1,28 +1,105 @@
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   | FileCheck %s -check-prefix=mips16 
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16
 
 define i32 @bswap32(i32 %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap32:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
 ; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; mips16: .ent bswap32
+
+; MIPS64-LABEL: bswap32:
+; MIPS64: wsbh $[[R0:[0-9]+]]
+; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+
+; MIPS16-LABEL: bswap32:
+; MIPS16-DAG: srl $[[R0:[0-9]+]], $4, 8
+; MIPS16-DAG: srl $[[R1:[0-9]+]], $4, 24
+; MIPS16-DAG: sll $[[R2:[0-9]+]], $4, 8
+; MIPS16-DAG: sll $[[R3:[0-9]+]], $4, 24
+; MIPS16-DAG: li  $[[R4:[0-9]+]], 65280
+; MIPS16-DAG: and $[[R4]], $[[R0]]
+; MIPS16-DAG: or  $[[R1]], $[[R4]]
+; MIPS16-DAG: lw  $[[R7:[0-9]+]], $CPI
+; MIPS16-DAG: and $[[R7]], $[[R2]]
+; MIPS16-DAG: or  $[[R3]], $[[R7]]
+; MIPS16-DAG: or  $[[R3]], $[[R1]]
+
   %or.3 = call i32 @llvm.bswap.i32(i32 %x)
   ret i32 %or.3
 }
 
 define i64 @bswap64(i64 %x) nounwind readnone {
 entry:
+; MIPS32-LABEL: bswap64:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+
 ; MIPS64-LABEL: bswap64:
 ; MIPS64: dsbh $[[R0:[0-9]+]]
 ; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
-; mips16: .ent bswap64
+
+; MIPS16-LABEL: bswap64:
+; MIPS16-DAG: srl $[[R0:[0-9]+]], $5, 8
+; MIPS16-DAG: srl $[[R1:[0-9]+]], $5, 24
+; MIPS16-DAG: sll $[[R2:[0-9]+]], $5, 8
+; MIPS16-DAG: sll $[[R3:[0-9]+]], $5, 24
+; MIPS16-DAG: li  $[[R4:[0-9]+]], 65280
+; MIPS16-DAG: and $[[R0]], $[[R4]]
+; MIPS16-DAG: or  $[[R1]], $[[R0]]
+; MIPS16-DAG: lw  $[[R7:[0-9]+]], 1f
+; MIPS16-DAG: and $[[R2]], $[[R7]]
+; MIPS16-DAG: or  $[[R3]], $[[R2]]
+; MIPS16-DAG: or  $[[R3]], $[[R1]]
+; MIPS16-DAG: srl $[[R0:[0-9]+]], $4, 8
+; MIPS16-DAG: srl $[[R1:[0-9]+]], $4, 24
+; MIPS16-DAG: sll $[[R2:[0-9]+]], $4, 8
+; MIPS16-DAG: sll $[[R3:[0-9]+]], $4, 24
+; MIPS16-DAG: li  $[[R4:[0-9]+]], 65280
+; MIPS16-DAG: and $[[R0]], $[[R4]]
+; MIPS16-DAG: or  $[[R1]], $[[R0]]
+; MIPS16-DAG: lw  $[[R7:[0-9]+]], 1f
+; MIPS16-DAG: and $[[R2]], $[[R7]]
+; MIPS16-DAG: or  $[[R3]], $[[R2]]
+; MIPS16-DAG: or  $[[R3]], $[[R1]]
+
   %or.7 = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %or.7
 }
 
+define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone {
+entry:
+; MIPS32-LABEL: bswapv4i32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+
+; MIPS64-LABEL: bswapv4i32:
+; MIPS64: wsbh $[[R0:[0-9]+]]
+; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64: wsbh $[[R0:[0-9]+]]
+; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64: wsbh $[[R0:[0-9]+]]
+; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64: wsbh $[[R0:[0-9]+]]
+; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+
+; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and
+; would be very long
+
+  %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %x)
+  ret <4 x i32> %ret
+}
+
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
 
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll
index 490d4273c5b61..7682a98ace992 100644
--- a/test/CodeGen/Mips/buildpairextractelementf64.ll
+++ b/test/CodeGen/Mips/buildpairextractelementf64.ll
@@ -1,15 +1,19 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=FP32
-; RUN: llc -march=mips  < %s | FileCheck %s -check-prefix=FP32
-; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
-; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=NO-MFHC1 -check-prefix=ALL
+; RUN: llc -march=mips  < %s | FileCheck %s -check-prefix=NO-MFHC1 -check-prefix=ALL
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=HAS-MFHC1 -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=HAS-MFHC1 -check-prefix=ALL
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=HAS-MFHC1 -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=HAS-MFHC1 -check-prefix=ALL
 
 @a = external global i32
 
-; CHECK-LABEL: f:
-; FP32: mtc1
-; FP32: mtc1
-; FP64-DAG: mtc1
-; FP64-DAG: mthc1
+; ALL-LABEL: f:
+
+; NO-MFHC1: mtc1
+; NO-MFHC1: mtc1
+
+; HAS-MFHC1-DAG: mtc1
+; HAS-MFHC1-DAG: mthc1
 
 define double @f(i32 %a1, double %d) nounwind {
 entry:
@@ -18,11 +22,13 @@ entry:
   ret double %add
 }
 
-; CHECK-LABEL: f3:
-; FP32: mfc1
-; FP32: mfc1
-; FP64-DAG: mfc1
-; FP64-DAG: mfhc1
+; ALL-LABEL: f3:
+
+; NO-MFHC1: mfc1
+; NO-MFHC1: mfc1
+
+; HAS-MFHC1-DAG: mfc1
+; HAS-MFHC1-DAG: mfhc1
 
 define void @f3(double %d, i32 %a1) nounwind {
 entry:
diff --git a/test/CodeGen/Mips/cache-intrinsic.ll b/test/CodeGen/Mips/cache-intrinsic.ll
new file mode 100644
index 0000000000000..2fa4115895965
--- /dev/null
+++ b/test/CodeGen/Mips/cache-intrinsic.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@buffer = global [32 x i8] c"This is a largely unused buffer\00", align 1
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+@.str1 = private unnamed_addr constant [25 x i8] c"Still, largely unused...\00", align 1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+  %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3
+  call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+  ret i32 0
+}
+
+; CHECK: __clear_cache
+
+declare i32 @printf(i8*, ...)
+
+declare i8* @strcpy(i8*, i8*)
+
+declare void @llvm.clear_cache(i8*, i8*)
diff --git a/test/CodeGen/Mips/call-optimization.ll b/test/CodeGen/Mips/call-optimization.ll
new file mode 100644
index 0000000000000..bfa09eaae3cb8
--- /dev/null
+++ b/test/CodeGen/Mips/call-optimization.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \
+; RUN:  FileCheck %s -check-prefix=O32
+; RUN: llc -march=mipsel -mips-load-target-from-got=false \
+; RUN:  -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32-LOADTGT
+
+@gd1 = common global double 0.000000e+00, align 8
+@gd2 = common global double 0.000000e+00, align 8
+
+; O32-LABEL: caller3:
+; O32-DAG:   lw $25, %call16(callee3)
+; O32-DAG:   move $gp
+; O32:       jalr $25
+; O32-NOT:   move $gp
+; O32:       lw $25, %call16(callee3)
+; O32-NOT:   move $gp
+; O32:       jalr $25
+; O32-NOT:   move $gp
+; O32:       lw $25, %call16(callee3)
+; O32-NOT:   move $gp
+; O32:       jalr $25
+
+; O32-LOADTGT-LABEL: caller3:
+; O32-LOADTGT-DAG:   lw $25, %call16(callee3)
+; O32-LOADTGT-DAG:   move $gp
+; O32-LOADTGT:       jalr $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       move $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       jalr $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       move $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       jalr $25
+
+define void @caller3(i32 %n) {
+entry:
+  tail call void @callee3()
+  tail call void @callee3()
+  %tobool1 = icmp eq i32 %n, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:
+  %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %dec = add nsw i32 %n.addr.02, -1
+  tail call void @callee3()
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+declare void @callee3()
+
+; O32-LABEL: caller4:
+; O32-DAG:   lw $25, %call16(ceil)
+; O32-DAG:   move $gp
+; O32:       jalr $25
+; O32-NOT:   move $gp
+; O32:       lw $25, %call16(ceil)
+; O32-NOT:   move $gp
+; O32:       jalr $25
+; O32-NOT:   move $gp
+; O32:       lw $25, %call16(ceil)
+; O32-NOT:   move $gp
+; O32:       jalr $25
+
+; O32-LOADTGT-LABEL: caller4:
+; O32-LOADTGT-DAG:   lw $25, %call16(ceil)
+; O32-LOADTGT-DAG:   move $gp
+; O32-LOADTGT:       jalr $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       move $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       jalr $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       move $25
+; O32-LOADTGT-NOT:   move $gp
+; O32-LOADTGT:       jalr $25
+
+define void @caller4(double %d) {
+entry:
+  %call = tail call double @ceil(double %d)
+  %call1 = tail call double @ceil(double %call)
+  store double %call1, double* @gd2, align 8
+  %call2 = tail call double @ceil(double %call1)
+  store double %call2, double* @gd1, align 8
+  ret void
+}
+
+declare double @ceil(double)
diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll
new file mode 100644
index 0000000000000..e2119ec08028a
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -0,0 +1,222 @@
+; RUN: llc -march=mips -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the floating point arguments for all ABI's and byte orders as specified
+; by section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, double %b, double %c, double %d, double %e,
+                         double %f, double %g, double %h, double %i) nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+        store volatile double %c, double* %2
+        %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+        store volatile double %d, double* %3
+        %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+        store volatile double %e, double* %4
+        %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+        store volatile double %f, double* %5
+        %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+        store volatile double %g, double* %6
+        %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+        store volatile double %h, double* %7
+        %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+        store volatile double %i, double* %8
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
+; O32-DAG:           sw $4, 8([[R2]])
+; O32-DAG:           sw $5, 12([[R2]])
+; NEW-DAG:           sd $4, 8([[R2]])
+
+; O32-DAG:           sw $6, 16([[R2]])
+; O32-DAG:           sw $7, 20([[R2]])
+; NEW-DAG:           sd $5, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 24($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 28($sp)
+; O32-DAG:           sw [[R3]], 24([[R2]])
+; O32-DAG:           sw [[R4]], 28([[R2]])
+; NEW-DAG:           sd $6, 24([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 36($sp)
+; O32-DAG:           sw [[R3]], 32([[R2]])
+; O32-DAG:           sw [[R4]], 36([[R2]])
+; NEW-DAG:           sd $7, 32([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 40($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 44($sp)
+; O32-DAG:           sw [[R3]], 40([[R2]])
+; O32-DAG:           sw [[R4]], 44([[R2]])
+; NEW-DAG:           sd $8, 40([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 52($sp)
+; O32-DAG:           sw [[R3]], 48([[R2]])
+; O32-DAG:           sw [[R4]], 52([[R2]])
+; NEW-DAG:           sd $9, 48([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 56($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG:           sw [[R3]], 56([[R2]])
+; O32-DAG:           sw [[R4]], 60([[R2]])
+; NEW-DAG:           sd $10, 56([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; O32-DAG:           lw [[R3:\$[0-9]+]], 64($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 68($sp)
+; O32-DAG:           sw [[R3]], 64([[R2]])
+; O32-DAG:           sw [[R4]], 68([[R2]])
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sd $11, 64([[R2]])
+
+define void @float_args(float %a, float %b, float %c, float %d, float %e,
+                        float %f, float %g, float %h, float %i, float %j)
+                       nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+        store volatile float %c, float* %2
+        %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+        store volatile float %d, float* %3
+        %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+        store volatile float %e, float* %4
+        %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+        store volatile float %f, float* %5
+        %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+        store volatile float %g, float* %6
+        %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+        store volatile float %h, float* %7
+        %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+        store volatile float %i, float* %8
+        %9 = getelementptr [11 x float]* @floats, i32 0, i32 10
+        store volatile float %j, float* %9
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; ALL-DAG:           sw $4, 4([[R2]])
+; ALL-DAG:           sw $5, 8([[R2]])
+; ALL-DAG:           sw $6, 12([[R2]])
+; ALL-DAG:           sw $7, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
+; O32-DAG:           sw [[R3]], 20([[R2]])
+; NEW-DAG:           sw $8, 20([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sw [[R3]], 24([[R2]])
+; NEW-DAG:           sw $9, 24([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sw [[R3]], 28([[R2]])
+; NEW-DAG:           sw $10, 28([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sw [[R3]], 32([[R2]])
+; NEW-DAG:           sw $11, 32([[R2]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sw [[R3]], 36([[R2]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sw [[R3]], 36([[R2]])
+
+define void @double_arg2(i8 %a, double %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; The second slot is insufficiently aligned for double on O32 so it is skipped.
+; Also, double occupies two slots on O32 and only one for N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+; O32-DAG:           sw $6, 8([[R2]])
+; O32-DAG:           sw $7, 12([[R2]])
+; NEW-DAG:           sd $5, 8([[R2]])
+
+define void @float_arg2(i8 %a, float %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; ALL-DAG:           sb $4, 1([[R1]])
+; ALL-DAG:           sw $5, 4([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-fp128.ll b/test/CodeGen/Mips/cconv/arguments-fp128.ll
new file mode 100644
index 0000000000000..c8cd8fd11e50a
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-fp128.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+
+; Test the fp128 arguments for all ABI's and byte orders as specified
+; by section 2 of the MIPSpro N32 Handbook.
+;
+; O32 is not tested because long double is the same as double on O32.
+
+@ldoubles = global [11 x fp128] zeroinitializer
+
+define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
+entry:
+        %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+        store volatile fp128 %a, fp128* %0
+        %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+        store volatile fp128 %b, fp128* %1
+        %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+        store volatile fp128 %c, fp128* %2
+        %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+        store volatile fp128 %d, fp128* %3
+        %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+        store volatile fp128 %e, fp128* %4
+        ret void
+}
+
+; ALL-LABEL: ldouble_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(ldoubles)(
+
+; The first four arguments are the same in N32/N64.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
+; ALL-DAG:           sd $4, 16([[R2]])
+; ALL-DAG:           sd $5, 24([[R2]])
+; ALL-DAG:           sd $6, 32([[R2]])
+; ALL-DAG:           sd $7, 40([[R2]])
+; ALL-DAG:           sd $8, 48([[R2]])
+; ALL-DAG:           sd $9, 56([[R2]])
+; ALL-DAG:           sd $10, 64([[R2]])
+; ALL-DAG:           sd $11, 72([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; ALL-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; ALL-DAG:           ld [[R4:\$[0-9]+]], 8($sp)
+; ALL-DAG:           sd [[R3]], 80([[R2]])
+; ALL-DAG:           sd [[R4]], 88([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
new file mode 100644
index 0000000000000..aadf7d18c17db
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@@ -0,0 +1,157 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+
+; Test the effect of varargs on floating point types in the non-variable part
+; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
+;
+; N32/N64 are almost identical in this area so many of their checks have been
+; combined into the 'NEW' prefix (the N stands for New).
+;
+; On O32, varargs prevents all FPU argument register usage. This contradicts
+; the N32 handbook, but agrees with the SYSV ABI and GCC's behaviour.
+
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, ...)
+                         nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+
+        %ap = alloca i8*
+        %ap2 = bitcast i8** %ap to i8*
+        call void @llvm.va_start(i8* %ap2)
+        %b = va_arg i8** %ap, double
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:         ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; O32 forbids using floating point registers for the non-variable portion.
+; N32/N64 allow it.
+; O32BE-DAG:         mtc1 $5, [[FTMP1:\$f[0-9]*[02468]+]]
+; O32BE-DAG:         mtc1 $4, [[FTMP2:\$f[0-9]*[13579]+]]
+; O32LE-DAG:         mtc1 $4, [[FTMP1:\$f[0-9]*[02468]+]]
+; O32LE-DAG:         mtc1 $5, [[FTMP2:\$f[0-9]*[13579]+]]
+; O32-DAG:           sdc1 [[FTMP1]], 8([[R2]])
+; NEW-DAG:           sdc1 $f12, 8([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and 8 bytes reserved
+; for arguments 1 and 2.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 16
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR]], [[VAPTR]], 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+; O32-DAG:           ldc1 [[FTMP1:\$f[0-9]+]], 16($sp)
+; NEW-DAG:           ldc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; ALL-DAG:           sdc1 [[FTMP1]], 16([[R2]])
+
+define void @float_args(float %a, ...) nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+
+        %ap = alloca i8*
+        %ap2 = bitcast i8** %ap to i8*
+        call void @llvm.va_start(i8* %ap2)
+        %b = va_arg i8** %ap, float
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:         ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The non-variable portion should be unaffected.
+; O32-DAG:           sw $4, 4([[R2]])
+; NEW-DAG:           swc1 $f12, 4([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $5, 12($sp)
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and should have 8
+; bytes reserved for arguments 1 and 2 (the first float arg) but as discussed in
+; arguments-float.ll, GCC doesn't agree with MD00305 and treats floats as 4
+; bytes so we only have 12 bytes total.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 12
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine
+; since the stack is always aligned.
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 4
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+; O32-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
+; NEW-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; ALL-DAG:           swc1 [[FTMP1]], 8([[R2]])
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float.ll b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
new file mode 100644
index 0000000000000..9837f7ee5586b
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
@@ -0,0 +1,211 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the floating point arguments for all ABI's and byte orders as specified
+; by section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, double %b, double %c, double %d, double %e,
+                         double %f, double %g, double %h, double %i) nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+        store volatile double %c, double* %2
+        %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+        store volatile double %d, double* %3
+        %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+        store volatile double %e, double* %4
+        %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+        store volatile double %f, double* %5
+        %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+        store volatile double %g, double* %6
+        %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+        store volatile double %h, double* %7
+        %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+        store volatile double %i, double* %8
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first argument is floating point so floating point registers are used.
+; The first argument is the same for O32/N32/N64 but the second argument differs
+; by register
+; ALL-DAG:           sdc1 $f12, 8([[R2]])
+; O32-DAG:           sdc1 $f14, 16([[R2]])
+; NEW-DAG:           sdc1 $f13, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 16($sp)
+; O32-DAG:           sdc1 [[F1]], 24([[R2]])
+; NEW-DAG:           sdc1 $f14, 24([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 24($sp)
+; O32-DAG:           sdc1 [[F1]], 32([[R2]])
+; NEW-DAG:           sdc1 $f15, 32([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 32($sp)
+; O32-DAG:           sdc1 [[F1]], 40([[R2]])
+; NEW-DAG:           sdc1 $f16, 40([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 40($sp)
+; O32-DAG:           sdc1 [[F1]], 48([[R2]])
+; NEW-DAG:           sdc1 $f17, 48([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 48($sp)
+; O32-DAG:           sdc1 [[F1]], 56([[R2]])
+; NEW-DAG:           sdc1 $f18, 56([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 56($sp)
+; O32-DAG:           sdc1 [[F1]], 64([[R2]])
+; NEW-DAG:           sdc1 $f19, 64([[R2]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 64($sp)
+; O32-DAG:           sdc1 [[F1]], 72([[R2]])
+; NEW-DAG:           ldc1 [[F1:\$f[0-9]+]], 0($sp)
+; NEW-DAG:           sdc1 [[F1]], 72([[R2]])
+
+define void @float_args(float %a, float %b, float %c, float %d, float %e,
+                        float %f, float %g, float %h, float %i) nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+        store volatile float %c, float* %2
+        %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+        store volatile float %d, float* %3
+        %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+        store volatile float %e, float* %4
+        %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+        store volatile float %f, float* %5
+        %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+        store volatile float %g, float* %6
+        %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+        store volatile float %h, float* %7
+        %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+        store volatile float %i, float* %8
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(floats)(
+
+; The first argument is floating point so floating point registers are used.
+; The first argument is the same for O32/N32/N64 but the second argument differs
+; by register
+; ALL-DAG:           swc1 $f12, 4([[R1]])
+; O32-DAG:           swc1 $f14, 8([[R1]])
+; NEW-DAG:           swc1 $f13, 8([[R1]])
+
+; O32 has run out of argument registers and (in theory) starts using the stack
+; I've yet to find a reference in the documentation about this but GCC uses up
+; the remaining two argument slots in the GPR's first. We'll do the same for
+; compatibility.
+; O32-DAG:           sw $6, 12([[R1]])
+; NEW-DAG:           swc1 $f14, 12([[R1]])
+; O32-DAG:           sw $7, 16([[R1]])
+; NEW-DAG:           swc1 $f15, 16([[R1]])
+
+; O32 is definitely out of registers now and switches to the stack.
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 16($sp)
+; O32-DAG:           swc1 [[F1]], 20([[R1]])
+; NEW-DAG:           swc1 $f16, 20([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 20($sp)
+; O32-DAG:           swc1 [[F1]], 24([[R1]])
+; NEW-DAG:           swc1 $f17, 24([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 24($sp)
+; O32-DAG:           swc1 [[F1]], 28([[R1]])
+; NEW-DAG:           swc1 $f18, 28([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 28($sp)
+; O32-DAG:           swc1 [[F1]], 32([[R1]])
+; NEW-DAG:           swc1 $f19, 32([[R1]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 32($sp)
+; O32-DAG:           swc1 [[F1]], 36([[R1]])
+; NEW-DAG:           lwc1 [[F1:\$f[0-9]+]], 0($sp)
+; NEW-DAG:           swc1 [[F1]], 36([[R1]])
+
+
+define void @double_arg2(i8 %a, double %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The first argument isn't floating point so floating point registers are not
+; used in O32, but N32/N64 will still use them.
+; The second slot is insufficiently aligned for double on O32 so it is skipped.
+; Also, double occupies two slots on O32 and only one for N32/N64.
+; O32LE-DAG:           mtc1 $6, [[F1:\$f[0-9]*[02468]+]]
+; O32LE-DAG:           mtc1 $7, [[F2:\$f[0-9]*[13579]+]]
+; O32BE-DAG:           mtc1 $6, [[F2:\$f[0-9]*[13579]+]]
+; O32BE-DAG:           mtc1 $7, [[F1:\$f[0-9]*[02468]+]]
+; O32-DAG:           sdc1 [[F1]], 8([[R2]])
+; NEW-DAG:           sdc1 $f13, 8([[R2]])
+
+define void @float_arg2(i8 %a, float %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The first argument isn't floating point so floating point registers are not
+; used in O32, but N32/N64 will still use them.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; O32-DAG:           sw $5, 4([[R2]])
+; NEW-DAG:           swc1 $f13, 4([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
new file mode 100644
index 0000000000000..5e3f403495f5b
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+
+; Test the fp128 arguments for all ABI's and byte orders as specified
+; by section 2 of the MIPSpro N32 Handbook.
+;
+; O32 is not tested because long double is the same as double on O32.
+
+@ldoubles = global [11 x fp128] zeroinitializer
+
+define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
+entry:
+        %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+        store volatile fp128 %a, fp128* %0
+        %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+        store volatile fp128 %b, fp128* %1
+        %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+        store volatile fp128 %c, fp128* %2
+        %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+        store volatile fp128 %d, fp128* %3
+        %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+        store volatile fp128 %e, fp128* %4
+        ret void
+}
+
+; ALL-LABEL: ldouble_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(ldoubles)(
+
+; The first four arguments are the same in N32/N64.
+; ALL-DAG:           sdc1 $f12, 16([[R2]])
+; ALL-DAG:           sdc1 $f13, 24([[R2]])
+; ALL-DAG:           sdc1 $f14, 32([[R2]])
+; ALL-DAG:           sdc1 $f15, 40([[R2]])
+; ALL-DAG:           sdc1 $f16, 48([[R2]])
+; ALL-DAG:           sdc1 $f17, 56([[R2]])
+; ALL-DAG:           sdc1 $f18, 64([[R2]])
+; ALL-DAG:           sdc1 $f19, 72([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; ALL-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; ALL-DAG:           ld [[R4:\$[0-9]+]], 8($sp)
+; ALL-DAG:           sd [[R3]], 80([[R2]])
+; ALL-DAG:           sd [[R4]], 88([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll
new file mode 100644
index 0000000000000..8fe29f3c8ce7e
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@@ -0,0 +1,170 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the integer arguments for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+;
+; Varargs are covered in arguments-hard-float-varargs.ll.
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
+                                i8 %h, i8 %i, i8 %j) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+        store volatile i8 %b, i8* %1
+        %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+        store volatile i8 %c, i8* %2
+        %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+        store volatile i8 %d, i8* %3
+        %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+        store volatile i8 %e, i8* %4
+        %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+        store volatile i8 %f, i8* %5
+        %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+        store volatile i8 %g, i8* %6
+        %7 = getelementptr [11 x i8]* @bytes, i32 0, i32 8
+        store volatile i8 %h, i8* %7
+        %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 9
+        store volatile i8 %i, i8* %8
+        %9 = getelementptr [11 x i8]* @bytes, i32 0, i32 10
+        store volatile i8 %j, i8* %9
+        ret void
+}
+
+; ALL-LABEL: align_to_arg_slots:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+
+; The first four arguments are the same in O32/N32/N64
+; ALL-DAG:           sb $4, 1([[R1]])
+; ALL-DAG:           sb $5, 2([[R1]])
+; ALL-DAG:           sb $6, 3([[R1]])
+; ALL-DAG:           sb $7, 4([[R1]])
+
+; N32/N64 get an extra four arguments in registers
+; O32 starts loading from the stack. The addresses start at 16 because space is
+; always reserved for the first four arguments.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
+; O32-DAG:           sb [[R3]], 5([[R1]])
+; NEW-DAG:           sb $8, 5([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sb [[R3]], 6([[R1]])
+; NEW-DAG:           sb $9, 6([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sb [[R3]], 7([[R1]])
+; NEW-DAG:           sb $10, 7([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sb [[R3]], 8([[R1]])
+; NEW-DAG:           sb $11, 8([[R1]])
+
+; O32/N32/N64 are accessing the stack at this point.
+; Unlike O32, N32/N64 do not reserve space for the arguments.
+; increase by 4 for O32 and 8 for N32/N64.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sb [[R3]], 9([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sb [[R3]], 9([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 36($sp)
+; O32-DAG:           sb [[R3]], 10([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG:           sb [[R3]], 10([[R1]])
+
+define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
+                           i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x i64]* @dwords, i32 0, i32 1
+        store volatile i64 %b, i64* %1
+        %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+        store volatile i8 %c, i8* %2
+        %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+        store volatile i8 %d, i8* %3
+        %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+        store volatile i8 %e, i8* %4
+        %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+        store volatile i8 %f, i8* %5
+        %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+        store volatile i8 %g, i8* %6
+        %7 = getelementptr [11 x i64]* @dwords, i32 0, i32 2
+        store volatile i64 %i, i64* %7
+        %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+        store volatile i8 %j, i8* %8
+        ret void
+}
+
+; ALL-LABEL: slot_skipping:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(dwords)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The second slot is insufficiently aligned for i64 on O32 so it is skipped.
+; Also, i64 occupies two slots on O32 and only one for N32/N64.
+; O32-DAG:           sw $6, 8([[R2]])
+; O32-DAG:           sw $7, 12([[R2]])
+; NEW-DAG:           sd $5, 8([[R2]])
+
+; N32/N64 get an extra four arguments in registers and still have two left from
+; the first four.
+; O32 starts loading from the stack. The addresses start at 16 because space is
+; always reserved for the first four arguments.
+; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
+; accept it for now. The only IR difference is that this argument has
+; anyext from i8 and align 8 on it.
+; O32LE-DAG:           lbu [[R3:\$[0-9]+]], 16($sp)
+; O32BE-DAG:           lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG:           sb [[R3]], 2([[R1]])
+; NEW-DAG:           sb $6, 2([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sb [[R3]], 3([[R1]])
+; NEW-DAG:           sb $7, 3([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sb [[R3]], 4([[R1]])
+; NEW-DAG:           sb $8, 4([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sb [[R3]], 5([[R1]])
+; NEW-DAG:           sb $9, 5([[R1]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sb [[R3]], 6([[R1]])
+; NEW-DAG:           sb $10, 6([[R1]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 40($sp)
+; O32-DAG:           sw [[R3]], 16([[R2]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 44($sp)
+; O32-DAG:           sw [[R3]], 20([[R2]])
+; NEW-DAG:           sd $11, 16([[R2]])
+
+; O32/N32/N64 are accessing the stack at this point.
+; Unlike O32, N32/N64 do not reserve space for the arguments.
+; increase by 4 for O32 and 8 for N32/N64.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
+; O32-DAG:           sb [[R3]], 7([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sb [[R3]], 7([[R1]])
diff --git a/test/CodeGen/Mips/cconv/callee-saved-float.ll b/test/CodeGen/Mips/cconv/callee-saved-float.ll
new file mode 100644
index 0000000000000..de4d9171aec44
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-float.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+
+; Test the the callee-saved registers are callee-saved as specified by section
+; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
+
+define void @fpu_clobber() nounwind {
+entry:
+        call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+        ret void
+}
+
+; ALL-LABEL: fpu_clobber:
+; ALL-INV-NOT:   sdc1 $f0,
+; ALL-INV-NOT:   sdc1 $f1,
+; ALL-INV-NOT:   sdc1 $f2,
+; ALL-INV-NOT:   sdc1 $f3,
+; ALL-INV-NOT:   sdc1 $f4,
+; ALL-INV-NOT:   sdc1 $f5,
+; ALL-INV-NOT:   sdc1 $f6,
+; ALL-INV-NOT:   sdc1 $f7,
+; ALL-INV-NOT:   sdc1 $f8,
+; ALL-INV-NOT:   sdc1 $f9,
+; ALL-INV-NOT:   sdc1 $f10,
+; ALL-INV-NOT:   sdc1 $f11,
+; ALL-INV-NOT:   sdc1 $f12,
+; ALL-INV-NOT:   sdc1 $f13,
+; ALL-INV-NOT:   sdc1 $f14,
+; ALL-INV-NOT:   sdc1 $f15,
+; ALL-INV-NOT:   sdc1 $f16,
+; ALL-INV-NOT:   sdc1 $f17,
+; ALL-INV-NOT:   sdc1 $f18,
+; ALL-INV-NOT:   sdc1 $f19,
+; ALL-INV-NOT:   sdc1 $f21,
+; ALL-INV-NOT:   sdc1 $f23,
+
+; O32:           addiu $sp, $sp, -48
+; O32-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; O32-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; O32-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; O32-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; O32-INV-NOT:   sdc1 $f25,
+; O32-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; O32-INV-NOT:   sdc1 $f27,
+; O32-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; O32-INV-NOT:   sdc1 $f29,
+; O32-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; O32-INV-NOT:   sdc1 $f31,
+; O32:           addiu $sp, $sp, 48
+
+; N32:           addiu $sp, $sp, -48
+; N32-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N32-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; N32-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; N32-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; N32-INV-NOT:   sdc1 $f25,
+; N32-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; N32-INV-NOT:   sdc1 $f27,
+; N32-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; N32-INV-NOT:   sdc1 $f29,
+; N32-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; N32-INV-NOT:   sdc1 $f31,
+; N32:           addiu $sp, $sp, 48
+
+; N64:           addiu $sp, $sp, -64
+; N64-INV-NOT:   sdc1 $f20,
+; N64-INV-NOT:   sdc1 $f22,
+; N64-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F25:\$f25]], [[OFF25:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F27:\$f27]], [[OFF27:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F29:\$f29]], [[OFF29:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F31:\$f31]], [[OFF31:[0-9]+]]($sp)
+; N64-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; N64-DAG:       ldc1 [[F25]], [[OFF25]]($sp)
+; N64-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; N64-DAG:       ldc1 [[F27]], [[OFF27]]($sp)
+; N64-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; N64-DAG:       ldc1 [[F29]], [[OFF29]]($sp)
+; N64-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; N64-DAG:       ldc1 [[F31]], [[OFF31]]($sp)
+; N64:           addiu $sp, $sp, 64
diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll
new file mode 100644
index 0000000000000..4b28b99620757
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s
+
+define void @fpu_clobber() nounwind {
+entry:
+    call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+    ret void
+}
+
+; O32-FPXX-LABEL: fpu_clobber:
+; O32-FPXX-INV-NOT:   sdc1 $f0,
+; O32-FPXX-INV-NOT:   sdc1 $f1,
+; O32-FPXX-INV-NOT:   sdc1 $f2,
+; O32-FPXX-INV-NOT:   sdc1 $f3,
+; O32-FPXX-INV-NOT:   sdc1 $f4,
+; O32-FPXX-INV-NOT:   sdc1 $f5,
+; O32-FPXX-INV-NOT:   sdc1 $f6,
+; O32-FPXX-INV-NOT:   sdc1 $f7,
+; O32-FPXX-INV-NOT:   sdc1 $f8,
+; O32-FPXX-INV-NOT:   sdc1 $f9,
+; O32-FPXX-INV-NOT:   sdc1 $f10,
+; O32-FPXX-INV-NOT:   sdc1 $f11,
+; O32-FPXX-INV-NOT:   sdc1 $f12,
+; O32-FPXX-INV-NOT:   sdc1 $f13,
+; O32-FPXX-INV-NOT:   sdc1 $f14,
+; O32-FPXX-INV-NOT:   sdc1 $f15,
+; O32-FPXX-INV-NOT:   sdc1 $f16,
+; O32-FPXX-INV-NOT:   sdc1 $f17,
+; O32-FPXX-INV-NOT:   sdc1 $f18,
+; O32-FPXX-INV-NOT:   sdc1 $f19,
+; O32-FPXX-INV-NOT:   sdc1 $f21,
+; O32-FPXX-INV-NOT:   sdc1 $f23,
+; O32-FPXX-INV-NOT:   sdc1 $f25,
+; O32-FPXX-INV-NOT:   sdc1 $f27,
+; O32-FPXX-INV-NOT:   sdc1 $f29,
+; O32-FPXX-INV-NOT:   sdc1 $f31,
+
+; O32-FPXX:           addiu $sp, $sp, -48
+; O32-FPXX-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; O32-FPXX:           addiu $sp, $sp, 48
diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll
new file mode 100644
index 0000000000000..489879e98ad38
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mips -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s
+; RUN: llc -march=mipsel -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s
+
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+
+define void @fpu_clobber() nounwind {
+entry:
+    call void asm "# Clobber", "~{$f21}"()
+    ret void
+}
+
+; O32-FPXX-LABEL: fpu_clobber:
+
+; O32-FPXX:           addiu $sp, $sp, -8
+
+; O32-FP64-INV-NOT:   sdc1 $f20,
+; O32-FPXX-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+
+; O32-FPXX:           addiu $sp, $sp, 8
diff --git a/test/CodeGen/Mips/cconv/callee-saved.ll b/test/CodeGen/Mips/cconv/callee-saved.ll
new file mode 100644
index 0000000000000..293e99f0c8e69
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved.ll
@@ -0,0 +1,167 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+
+; Test the the callee-saved registers are callee-saved as specified by section
+; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
+
+define void @gpr_clobber() nounwind {
+entry:
+        ; Clobbering the stack pointer is a bad idea so we'll skip that one
+        call void asm "# Clobber", "~{$0},~{$1},~{$2},~{$3},~{$4},~{$5},~{$6},~{$7},~{$8},~{$9},~{$10},~{$11},~{$12},~{$13},~{$14},~{$15},~{$16},~{$17},~{$18},~{$19},~{$20},~{$21},~{$22},~{$23},~{$24},~{$25},~{$26},~{$27},~{$28},~{$30},~{$31}"()
+        ret void
+}
+
+; ALL-LABEL: gpr_clobber:
+; O32:           addiu $sp, $sp, -40
+; O32-INV-NOT:   sw $0,
+; O32-INV-NOT:   sw $1,
+; O32-INV-NOT:   sw $2,
+; O32-INV-NOT:   sw $3,
+; O32-INV-NOT:   sw $4,
+; O32-INV-NOT:   sw $5,
+; O32-INV-NOT:   sw $6,
+; O32-INV-NOT:   sw $7,
+; O32-INV-NOT:   sw $8,
+; O32-INV-NOT:   sw $9,
+; O32-INV-NOT:   sw $10,
+; O32-INV-NOT:   sw $11,
+; O32-INV-NOT:   sw $12,
+; O32-INV-NOT:   sw $13,
+; O32-INV-NOT:   sw $14,
+; O32-INV-NOT:   sw $15,
+; O32-DAG:       sw [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; O32-DAG:       sw [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; O32-DAG:       sw [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; O32-DAG:       sw [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; O32-DAG:       sw [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; O32-DAG:       sw [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; O32-DAG:       sw [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; O32-DAG:       sw [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; O32-INV-NOT:   sw $24,
+; O32-INV-NOT:   sw $25,
+; O32-INV-NOT:   sw $26,
+; O32-INV-NOT:   sw $27,
+; O32-INV-NOT:   sw $28,
+; O32-INV-NOT:   sw $29,
+; O32-DAG:       sw [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; O32-DAG:       sw [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; O32-DAG:       lw [[G16]], [[OFF16]]($sp)
+; O32-DAG:       lw [[G17]], [[OFF17]]($sp)
+; O32-DAG:       lw [[G18]], [[OFF18]]($sp)
+; O32-DAG:       lw [[G19]], [[OFF19]]($sp)
+; O32-DAG:       lw [[G20]], [[OFF20]]($sp)
+; O32-DAG:       lw [[G21]], [[OFF21]]($sp)
+; O32-DAG:       lw [[G22]], [[OFF22]]($sp)
+; O32-DAG:       lw [[G23]], [[OFF23]]($sp)
+; O32-DAG:       lw [[G30]], [[OFF30]]($sp)
+; O32-DAG:       lw [[G31]], [[OFF31]]($sp)
+; O32:           addiu $sp, $sp, 40
+
+; N32:           addiu $sp, $sp, -96
+; N32-INV-NOT:   sd $0,
+; N32-INV-NOT:   sd $1,
+; N32-INV-NOT:   sd $2,
+; N32-INV-NOT:   sd $3,
+; N32-INV-NOT:   sd $4,
+; N32-INV-NOT:   sd $5,
+; N32-INV-NOT:   sd $6,
+; N32-INV-NOT:   sd $7,
+; N32-INV-NOT:   sd $8,
+; N32-INV-NOT:   sd $9,
+; N32-INV-NOT:   sd $10,
+; N32-INV-NOT:   sd $11,
+; N32-INV-NOT:   sd $12,
+; N32-INV-NOT:   sd $13,
+; N32-INV-NOT:   sd $14,
+; N32-INV-NOT:   sd $15,
+; N32-DAG:       sd [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; N32-DAG:       sd [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; N32-DAG:       sd [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; N32-DAG:       sd [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; N32-DAG:       sd [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; N32-DAG:       sd [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; N32-DAG:       sd [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; N32-DAG:       sd [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; N32-INV-NOT:   sd $24,
+; N32-INV-NOT:   sd $25,
+; N32-INV-NOT:   sd $26,
+; N32-INV-NOT:   sd $27,
+; N32-DAG:       sd [[G28:\$gp]], [[OFF28:[0-9]+]]($sp)
+; N32-INV-NOT:   sd $29,
+; N32-DAG:       sd [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; N32-DAG:       sd [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; N32-DAG:       ld [[G16]], [[OFF16]]($sp)
+; N32-DAG:       ld [[G17]], [[OFF17]]($sp)
+; N32-DAG:       ld [[G18]], [[OFF18]]($sp)
+; N32-DAG:       ld [[G19]], [[OFF19]]($sp)
+; N32-DAG:       ld [[G20]], [[OFF20]]($sp)
+; N32-DAG:       ld [[G21]], [[OFF21]]($sp)
+; N32-DAG:       ld [[G22]], [[OFF22]]($sp)
+; N32-DAG:       ld [[G23]], [[OFF23]]($sp)
+; N32-DAG:       ld [[G28]], [[OFF28]]($sp)
+; N32-DAG:       ld [[G30]], [[OFF30]]($sp)
+; N32-DAG:       ld [[G31]], [[OFF31]]($sp)
+; N32:           addiu $sp, $sp, 96
+
+; N64:           daddiu $sp, $sp, -96
+; N64-INV-NOT:   sd $0,
+; N64-INV-NOT:   sd $1,
+; N64-INV-NOT:   sd $2,
+; N64-INV-NOT:   sd $3,
+; N64-INV-NOT:   sd $4,
+; N64-INV-NOT:   sd $5,
+; N64-INV-NOT:   sd $6,
+; N64-INV-NOT:   sd $7,
+; N64-INV-NOT:   sd $8,
+; N64-INV-NOT:   sd $9,
+; N64-INV-NOT:   sd $10,
+; N64-INV-NOT:   sd $11,
+; N64-INV-NOT:   sd $12,
+; N64-INV-NOT:   sd $13,
+; N64-INV-NOT:   sd $14,
+; N64-INV-NOT:   sd $15,
+; N64-DAG:       sd [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; N64-DAG:       sd [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; N64-DAG:       sd [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; N64-DAG:       sd [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; N64-DAG:       sd [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; N64-DAG:       sd [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; N64-DAG:       sd [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; N64-DAG:       sd [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; N64-DAG:       sd [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; N64-DAG:       sd [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; N64-INV-NOT:   sd $24,
+; N64-INV-NOT:   sd $25,
+; N64-INV-NOT:   sd $26,
+; N64-INV-NOT:   sd $27,
+; N64-DAG:       sd [[G28:\$gp]], [[OFF28:[0-9]+]]($sp)
+; N64-INV-NOT:   sd $29,
+; N64-DAG:       ld [[G16]], [[OFF16]]($sp)
+; N64-DAG:       ld [[G17]], [[OFF17]]($sp)
+; N64-DAG:       ld [[G18]], [[OFF18]]($sp)
+; N64-DAG:       ld [[G19]], [[OFF19]]($sp)
+; N64-DAG:       ld [[G20]], [[OFF20]]($sp)
+; N64-DAG:       ld [[G21]], [[OFF21]]($sp)
+; N64-DAG:       ld [[G22]], [[OFF22]]($sp)
+; N64-DAG:       ld [[G23]], [[OFF23]]($sp)
+; N64-DAG:       ld [[G28]], [[OFF28]]($sp)
+; N64-DAG:       ld [[G30]], [[OFF30]]($sp)
+; N64-DAG:       ld [[G31]], [[OFF31]]($sp)
+; N64:           daddiu $sp, $sp, 96
diff --git a/test/CodeGen/Mips/cconv/memory-layout.ll b/test/CodeGen/Mips/cconv/memory-layout.ll
new file mode 100644
index 0000000000000..0c3cc9ecedfef
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/memory-layout.ll
@@ -0,0 +1,140 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the memory layout for all ABI's and byte orders as specified by section
+; 4 of MD00305 (MIPS ABIs Described).
+; Bitfields are not covered since they are not available as a type in LLVM IR.
+;
+; The assembly directives deal with endianness so we don't need to account for
+; that.
+
+; Deliberately request alignments that are too small for the target so we get
+; the minimum alignment instead of the preferred alignment.
+@byte = global i8 1, align 1
+@halfword = global i16 258, align 1
+@word = global i32 16909060, align 1
+@float = global float 1.0, align 1
+@dword = global i64 283686952306183, align 1
+@double = global double 1.0, align 1
+@pointer = global i8* @byte
+
+; ALL-NOT:       .align
+; ALL-LABEL: byte:
+; ALL:           .byte 1
+; ALL:           .size byte, 1
+
+; ALL:           .align 1
+; ALL-LABEL: halfword:
+; ALL:           .2byte 258
+; ALL:           .size halfword, 2
+
+; ALL:           .align 2
+; ALL-LABEL: word:
+; ALL:           .4byte 16909060
+; ALL:           .size word, 4
+
+; ALL:           .align 2
+; ALL-LABEL: float:
+; ALL:           .4byte 1065353216
+; ALL:           .size float, 4
+
+; ALL:           .align 3
+; ALL-LABEL: dword:
+; ALL:           .8byte 283686952306183
+; ALL:           .size dword, 8
+
+; ALL:           .align 3
+; ALL-LABEL: double:
+; ALL:           .8byte 4607182418800017408
+; ALL:           .size double, 8
+
+; O32:           .align 2
+; N32:           .align 2
+; N64:           .align 3
+; ALL-LABEL: pointer:
+; O32:           .4byte byte
+; O32:           .size pointer, 4
+; N32:           .4byte byte
+; N32:           .size pointer, 4
+; N64:           .8byte byte
+; N64:           .size pointer, 8
+
+@byte_array = global [2 x i8] [i8 1, i8 2], align 1
+@halfword_array = global [2 x i16] [i16 1, i16 2], align 1
+@word_array = global [2 x i32] [i32 1, i32 2], align 1
+@float_array = global [2 x float] [float 1.0, float 2.0], align 1
+@dword_array = global [2 x i64] [i64 1, i64 2], align 1
+@double_array = global [2 x double] [double 1.0, double 2.0], align 1
+@pointer_array = global [2 x i8*] [i8* @byte, i8* @byte]
+
+; ALL-NOT:       .align
+; ALL-LABEL: byte_array:
+; ALL:           .ascii "\001\002"
+; ALL:           .size byte_array, 2
+
+; ALL:           .align 1
+; ALL-LABEL: halfword_array:
+; ALL:           .2byte 1
+; ALL:           .2byte 2
+; ALL:           .size halfword_array, 4
+
+; ALL:           .align 2
+; ALL-LABEL: word_array:
+; ALL:           .4byte 1
+; ALL:           .4byte 2
+; ALL:           .size word_array, 8
+
+; ALL:           .align 2
+; ALL-LABEL: float_array:
+; ALL:           .4byte 1065353216
+; ALL:           .4byte 1073741824
+; ALL:           .size float_array, 8
+
+; ALL:           .align 3
+; ALL-LABEL: dword_array:
+; ALL:           .8byte 1
+; ALL:           .8byte 2
+; ALL:           .size dword_array, 16
+
+; ALL:           .align 3
+; ALL-LABEL: double_array:
+; ALL:           .8byte 4607182418800017408
+; ALL:           .8byte 4611686018427387904
+; ALL:           .size double_array, 16
+
+; O32:           .align 2
+; N32:           .align 2
+; N64:           .align 3
+; ALL-LABEL: pointer_array:
+; O32:           .4byte byte
+; O32:           .4byte byte
+; O32:           .size pointer_array, 8
+; N32:           .4byte byte
+; N32:           .4byte byte
+; N32:           .size pointer_array, 8
+; N64:           .8byte byte
+; N64:           .8byte byte
+; N64:           .size pointer_array, 16
+
+%mixed = type { i8, double, i16 }
+@mixed = global %mixed { i8 1, double 1.0, i16 515 }, align 1
+
+; ALL:           .align 3
+; ALL-LABEL: mixed:
+; ALL:           .byte 1
+; ALL:           .space 7
+; ALL:           .8byte 4607182418800017408
+; ALL:           .2byte 515
+; ALL:           .space 6
+; ALL:           .size mixed, 24
+
+; Bitfields are not available in LLVM IR so we can't test them here.
diff --git a/test/CodeGen/Mips/cconv/reserved-space.ll b/test/CodeGen/Mips/cconv/reserved-space.ll
new file mode 100644
index 0000000000000..b36f89ecc1152
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/reserved-space.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test that O32 correctly reserved space for the four arguments, even when
+; there aren't any as per section 5 of MD00305 (MIPS ABIs Described).
+
+declare void @foo() nounwind;
+
+define void @reserved_space() nounwind {
+entry:
+        tail call void @foo()
+        ret void
+}
+
+; ALL-LABEL: reserved_space:
+; O32:           addiu $sp, $sp, -24
+; O32:           sw $ra, 20($sp)
+; O32:           lw $ra, 20($sp)
+; O32:           addiu $sp, $sp, 24
+; Despite pointers being 32-bit wide on N32, the return pointer is saved as a
+; 64-bit pointer. I've yet to find a documentation reference for this quirk but
+; this behaviour matches GCC so I have considered it to be correct.
+; N32:           addiu $sp, $sp, -16
+; N32:           sd $ra, 8($sp)
+; N32:           ld $ra, 8($sp)
+; N32:           addiu $sp, $sp, 16
+; N64:           daddiu $sp, $sp, -16
+; N64:           sd $ra, 8($sp)
+; N64:           ld $ra, 8($sp)
+; N64:           daddiu $sp, $sp, 16
diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll
new file mode 100644
index 0000000000000..28cf83d3efcf1
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=mips-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the float returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define float @retfloat() nounwind {
+entry:
+        %0 = load volatile float* @float
+        ret float %0
+}
+
+; ALL-LABEL: retfloat:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; O32-DAG:           lw $2, %lo(float)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; N32-DAG:           lw $2, %lo(float)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           lw $2, 0([[R1]])
+
+define double @retdouble() nounwind {
+entry:
+        %0 = load volatile double* @double
+        ret double %0
+}
+
+; ALL-LABEL: retdouble:
+; O32-DAG:           lw $2, %lo(double)([[R1:\$[0-9]+]])
+; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
+; O32-DAG:           lw $3, 4([[R2]])
+; N32-DAG:           ld $2, %lo(double)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll
new file mode 100644
index 0000000000000..3eb26fa9d24f7
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s
+
+; Test the float returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define float @retfloat() nounwind {
+entry:
+        %0 = load volatile float* @float
+        ret float %0
+}
+
+; ALL-LABEL: retfloat:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; O32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; N32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           lwc1 $f0, 0([[R1]])
+
+define double @retdouble() nounwind {
+entry:
+        %0 = load volatile double* @double
+        ret double %0
+}
+
+; ALL-LABEL: retdouble:
+; O32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
+; N32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ldc1 $f0, 0([[R1]])
+
+define { double, double } @retComplexDouble() #0 {
+  %retval = alloca { double, double }, align 8
+  %1 = load { double, double }* %retval
+  ret { double, double } %1
+}
+
+; ALL-LABEL: retComplexDouble:
+; 032FP64-DAG:      ldc1     $f0, 0($sp)
+; 032FP64-DAG:      ldc1     $f2, 8($sp)
diff --git a/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
new file mode 100644
index 0000000000000..0da59efddd6c6
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the fp128 returns for N32/N64 and all byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+;
+; O32 is not tested because long double is the same as double on O32.
+;
+@fp128 = global fp128 zeroinitializer
+
+define fp128 @retldouble() nounwind {
+entry:
+        %0 = load volatile fp128* @fp128
+        ret fp128 %0
+}
+
+; ALL-LABEL: retldouble:
+; N32-DAG:           ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG:           addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
+; N32-DAG:           ld [[R4:\$[0-9]+]], 8([[R3]])
+; N32-DAG:           dmtc1 [[R2]], $f0
+; N32-DAG:           dmtc1 [[R4]], $f2
+
+; N64-DAG:           ld [[R2:\$[0-9]+]], %got_disp(fp128)([[R1:\$[0-9]+]])
+; N64-DAG:           ld [[R3:\$[0-9]+]], 0([[R2]])
+; N64-DAG:           ld [[R4:\$[0-9]+]], 8([[R2]])
+; N64-DAG:           dmtc1 [[R3]], $f0
+; N64-DAG:           dmtc1 [[R4]], $f2
diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll
new file mode 100644
index 0000000000000..76ce5e44c4aed
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the integer returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@byte = global i8 zeroinitializer
+@word = global i32 zeroinitializer
+@dword = global i64 zeroinitializer
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define i8 @reti8() nounwind {
+entry:
+        %0 = load volatile i8* @byte
+        ret i8 %0
+}
+
+; ALL-LABEL: reti8:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(byte)
+; O32-DAG:           lbu $2, %lo(byte)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(byte)
+; N32-DAG:           lbu $2, %lo(byte)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(byte)($1)
+; N64-DAG:           lbu $2, 0([[R1]])
+
+define i32 @reti32() nounwind {
+entry:
+        %0 = load volatile i32* @word
+        ret i32 %0
+}
+
+; ALL-LABEL: reti32:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(word)
+; O32-DAG:           lw $2, %lo(word)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(word)
+; N32-DAG:           lw $2, %lo(word)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(word)($1)
+; N64-DAG:           lw $2, 0([[R1]])
+
+define i64 @reti64() nounwind {
+entry:
+        %0 = load volatile i64* @dword
+        ret i64 %0
+}
+
+; ALL-LABEL: reti64:
+; On O32, we must use v0 and v1 for the return value
+; O32-DAG:           lw $2, %lo(dword)([[R1:\$[0-9]+]])
+; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(dword)
+; O32-DAG:           lw $3, 4([[R2]])
+; N32-DAG:           ld $2, %lo(dword)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(dword)([[R1:\$[0-9]+]])
+; N64-DAG:           ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/stack-alignment.ll b/test/CodeGen/Mips/cconv/stack-alignment.ll
new file mode 100644
index 0000000000000..834033bc8da5c
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/stack-alignment.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the stack alignment for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+define void @local_bytes_1() nounwind {
+entry:
+        %0 = alloca i8
+        ret void
+}
+
+; ALL-LABEL: local_bytes_1:
+; O32:           addiu $sp, $sp, -8
+; O32:           addiu $sp, $sp, 8
+; N32:           addiu $sp, $sp, -16
+; N32:           addiu $sp, $sp, 16
+; N64:           addiu $sp, $sp, -16
+; N64:           addiu $sp, $sp, 16
diff --git a/test/CodeGen/Mips/cfi_offset.ll b/test/CodeGen/Mips/cfi_offset.ll
new file mode 100644
index 0000000000000..e23855bd65d28
--- /dev/null
+++ b/test/CodeGen/Mips/cfi_offset.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mips -mattr=+o32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB
+; RUN: llc -march=mipsel -mattr=+o32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL
+; RUN: llc -march=mips -mattr=+o32,+fp64 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB
+; RUN: llc -march=mipsel -mattr=+o32,+fp64 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL
+
+@var = global double 0.0
+
+declare void @foo(...)
+
+define void @bar() {
+
+; CHECK-LABEL:  bar:
+
+; CHECK:  .cfi_def_cfa_offset 40
+; CHECK:  sdc1  $f22, 32($sp)
+; CHECK:  sdc1  $f20, 24($sp)
+; CHECK:  sw  $ra, 20($sp)
+; CHECK:  sw  $16, 16($sp)
+
+; CHECK-EB:  .cfi_offset 55, -8
+; CHECK-EB:  .cfi_offset 54, -4
+; CHECK-EB:  .cfi_offset 53, -16
+; CHECK-EB:  .cfi_offset 52, -12
+
+; CHECK-EL:  .cfi_offset 54, -8
+; CHECK-EL:  .cfi_offset 55, -4
+; CHECK-EL:  .cfi_offset 52, -16
+; CHECK-EL:  .cfi_offset 53, -12
+
+; CHECK:  .cfi_offset 31, -20
+; CHECK:  .cfi_offset 16, -24
+
+    %val1 = load volatile double* @var
+    %val2 = load volatile double* @var
+    call void (...)* @foo() nounwind
+    store volatile double %val1, double* @var
+    store volatile double %val2, double* @var
+    ret void
+}
diff --git a/test/CodeGen/Mips/ci2.ll b/test/CodeGen/Mips/ci2.ll
new file mode 100644
index 0000000000000..7187f0c758885
--- /dev/null
+++ b/test/CodeGen/Mips/ci2.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=constisle
+
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@l = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  store i32 305419896, i32* @i, align 4
+  %0 = load i32* @b, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @b, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 20, i32* @b, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  call void asm sideeffect ".space 100000", ""() #1, !srcloc !1
+  store i32 305419896, i32* @l, align 4
+  ret void
+; constisle: $CPI0_1:
+; constisle	.4byte	305419896               # 0x12345678
+; constisle	#APP
+; constisle	.space 100000
+; constisle	#NO_APP
+; constisle $CPI0_0:
+; constisle	.4byte	305419896               # 0x12345678
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 103}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index c24c5ac26ae1b..0c13fb1adfbe9 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,16 +1,43 @@
-; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mips     -mcpu=mips32                 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32 -regalloc=basic < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32r2               < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32r6               < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
+; RUN: llc -march=mips64el -mcpu=mips4                  < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc -march=mips64el -mcpu=mips64                 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc -march=mips64el -mcpu=mips64r6               < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; O32-DAG:  lw $[[R0:[0-9]+]], %got(i3)
-; O32-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
-; O32:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
-; N64-DAG:  ldr $[[R0:[0-9]+]]
-; N64-DAG:  ld $[[R1:[0-9]+]], %got_disp(i1)
-; N64:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
+; ALL-LABEL: cmov1:
+
+; 32-CMOV-DAG:  lw $[[R0:[0-9]+]], %got(i3)
+; 32-CMOV-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; 32-CMOV-DAG:  movn $[[R0]], $[[R1]], $4
+; 32-CMOV-DAG:  lw $2, 0($[[R0]])
+
+; 32-CMP-DAG:   lw $[[R0:[0-9]+]], %got(i3)
+; 32-CMP-DAG:   addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; 32-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R1]], $4
+; 32-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R0]], $4
+; 32-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG:   lw $2, 0($[[T2]])
+
+; 64-CMOV-DAG:  ldr $[[R0:[0-9]+]]
+; 64-CMOV-DAG:  ld $[[R1:[0-9]+]], %got_disp(i1)
+; 64-CMOV-DAG:  movn $[[R0]], $[[R1]], $4
+
+; 64-CMP-DAG:   ld $[[R0:[0-9]+]], %got_disp(i3)(
+; 64-CMP-DAG:   daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(i1)
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64-CMP-DAG:   sll $[[CC:[0-9]+]], $4, 0
+; 64-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R1]], $[[CC]]
+; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R0]], $[[CC]]
+; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG:   ld $2, 0($[[T2]])
+
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -22,14 +49,35 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; O32-LABEL: cmov2:
-; O32: addiu $[[R1:[0-9]+]], ${{[a-z0-9]+}}, %got(d)
-; O32: addiu $[[R0:[0-9]+]], ${{[a-z0-9]+}}, %got(c)
-; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
-; N64-LABEL: cmov2:
-; N64: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
-; N64: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
-; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; ALL-LABEL: cmov2:
+
+; 32-CMOV-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(d)
+; 32-CMOV-DAG:  addiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got(c)
+; 32-CMOV-DAG:  movn  $[[R1]], $[[R0]], $4
+; 32-CMOV-DAG:  lw $2, 0($[[R0]])
+
+; 32-CMP-DAG:   addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(d)
+; 32-CMP-DAG:   addiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got(c)
+; 32-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R0]], $4
+; 32-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R1]], $4
+; 32-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG:   lw $2, 0($[[T2]])
+
+; 64-CMOV:      daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; 64-CMOV:      daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
+; 64-CMOV:      movn  $[[R1]], $[[R0]], $4
+
+; 64-CMP-DAG:   daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; 64-CMP-DAG:   daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64-CMP-DAG:   sll $[[CC:[0-9]+]], $4, 0
+; 64-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R0]], $[[CC]]
+; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R1]], $[[CC]]
+; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG:   lw $2, 0($[[T2]])
+
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -39,9 +87,28 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: cmov3:
-; O32: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: cmov3:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be either one of two registers so we can at least
+; check that.
+
+; 32-CMOV:      xori $[[R0:[0-9]+]], $4, 234
+; 32-CMOV:      movz ${{[26]}}, $5, $[[R0]]
+
+; 32-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 32-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV:      xori $[[R0:[0-9]+]], $4, 234
+; 64-CMOV:      movz ${{[26]}}, $5, $[[R0]]
+
+; 64-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 64-CMP-DAG:   seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 64-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
 define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
@@ -49,9 +116,69 @@ entry:
   ret i32 %cond
 }
 
-; N64-LABEL: cmov4:
-; N64: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: cmov3_ne:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be either one of two registers so we can at least
+; check that.
+
+; FIXME: Use xori instead of addiu+xor.
+; 32-CMOV:      addiu $[[R0:[0-9]+]], $zero, 234
+; 32-CMOV:      xor $[[R1:[0-9]+]], $4, $[[R0]]
+; 32-CMOV:      movn ${{[26]}}, $5, $[[R1]]
+
+; 32-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 32-CMP-DAG:   selnez $[[T0:[0-9]+]], $5, $[[CC]]
+; 32-CMP-DAG:   seleqz $[[T1:[0-9]+]], $6, $[[CC]]
+; 32-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
+; FIXME: Use xori instead of addiu+xor.
+; 64-CMOV:      addiu $[[R0:[0-9]+]], $zero, 234
+; 64-CMOV:      xor $[[R1:[0-9]+]], $4, $[[R0]]
+; 64-CMOV:      movn ${{[26]}}, $5, $[[R1]]
+
+; 64-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 64-CMP-DAG:   selnez $[[T0:[0-9]+]], $5, $[[CC]]
+; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $6, $[[CC]]
+; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
+define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %cmp = icmp ne i32 %a, 234
+  %cond = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %cond
+}
+
+; ALL-LABEL: cmov4:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be one of two registers so we can at least check
+; that.
+
+; 32-CMOV-DAG: xori $[[R0:[0-9]+]], $4, 234
+; 32-CMOV-DAG: lw $[[R1:2]], 16($sp)
+; 32-CMOV-DAG: lw $[[R2:3]], 20($sp)
+; 32-CMOV-DAG: movz $[[R1]], $6, $[[R0]]
+; 32-CMOV-DAG: movz $[[R2]], $7, $[[R0]]
+
+; 32-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 32-CMP-DAG:  lw $[[R1:[0-9]+]], 16($sp)
+; 32-CMP-DAG:  lw $[[R2:[0-9]+]], 20($sp)
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $6, $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $7, $[[R0]]
+; 32-CMP-DAG:  selnez $[[T2:[0-9]+]], $[[R1]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T3:[0-9]+]], $[[R2]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T2]]
+; 32-CMP-DAG:  or $3, $[[T1]], $[[T3]]
+
+; 64-CMOV: xori $[[R0:[0-9]+]], $4, 234
+; 64-CMOV: movz ${{[26]}}, $5, $[[R0]]
+
+; 64-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $5, $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $6, $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
 define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
@@ -59,6 +186,47 @@ entry:
   ret i64 %cond
 }
 
+; ALL-LABEL: cmov4_ne:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be one of two registers so we can at least check
+; that.
+
+; FIXME: Use xori instead of addiu+xor.
+; 32-CMOV-DAG: addiu $[[R0:[0-9]+]], $zero, 234
+; 32-CMOV-DAG: xor $[[R1:[0-9]+]], $4, $[[R0]]
+; 32-CMOV-DAG: lw $[[R2:2]], 16($sp)
+; 32-CMOV-DAG: lw $[[R3:3]], 20($sp)
+; 32-CMOV-DAG: movn $[[R2]], $6, $[[R1]]
+; 32-CMOV-DAG: movn $[[R3]], $7, $[[R1]]
+
+; 32-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 32-CMP-DAG:  lw $[[R1:[0-9]+]], 16($sp)
+; 32-CMP-DAG:  lw $[[R2:[0-9]+]], 20($sp)
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $6, $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $7, $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T2:[0-9]+]], $[[R1]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T3:[0-9]+]], $[[R2]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T2]]
+; 32-CMP-DAG:  or $3, $[[T1]], $[[T3]]
+
+; FIXME: Use xori instead of addiu+xor.
+; 64-CMOV: addiu $[[R0:[0-9]+]], $zero, 234
+; 64-CMOV: xor $[[R1:[0-9]+]], $4, $[[R0]]
+; 64-CMOV: movn ${{[26]}}, $5, $[[R1]]
+
+; 64-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $5, $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $6, $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone {
+entry:
+  %cmp = icmp ne i32 %a, 234
+  %cond = select i1 %cmp, i64 %b, i64 %c
+  ret i64 %cond
+}
+
 ; slti and conditional move.
 ;
 ; Check that, pattern
@@ -67,74 +235,271 @@ entry:
 ;  (movz t, (setlt a, N + 1), f)
 ; if N + 1 fits in 16-bit.
 
-; O32-LABEL: slti0:
-; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti0:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: slti $[[R0:[0-9]+]], $4, 32767
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: slti $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti0(i32 %a) {
 entry:
   %cmp = icmp sgt i32 %a, 32766
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: slti1:
-; O32: slt ${{[0-9]+}}
+; ALL-LABEL: slti1:
+
+; 32-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  addiu $[[I32767:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:  slt $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti1(i32 %a) {
 entry:
   %cmp = icmp sgt i32 %a, 32767
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 7, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: slti2:
-; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti2:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: slti $[[R0:[0-9]+]], $4, -32768
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: slti $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti2(i32 %a) {
 entry:
   %cmp = icmp sgt i32 %a, -32769
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: slti3:
-; O32: slt ${{[0-9]+}}
+; ALL-LABEL: slti3:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 32-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 32-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 32-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 32-CMP-DAG:  slt $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 64-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 64-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[IMM]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti3(i32 %a) {
 entry:
   %cmp = icmp sgt i32 %a, -32770
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
 
 ; 64-bit patterns.
 
-; N64-LABEL: slti64_0:
-; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti64_0:
+
+; 32-CMOV-DAG:  slt $[[CC:[0-9]+]], $zero, $4
+; 32-CMOV-DAG:  addiu $[[I32766:[0-9]+]], $zero, 32766
+; 32-CMOV-DAG:  sltu $[[R1:[0-9]+]], $[[I32766]], $5
+; 32-CMOV-DAG:  movz $[[CC:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMOV-DAG:  addiu $[[I4:3]], $zero, 4
+; 32-CMOV-DAG:  movn $[[I4]], $[[I5]], $[[CC]]
+; 32-CMOV-DAG:  addiu $2, $zero, 0
+
+; 32-CMP-DAG:   slt $[[CC0:[0-9]+]], $zero, $4
+; 32-CMP-DAG:   addiu $[[I32766:[0-9]+]], $zero, 32766
+; 32-CMP-DAG:   sltu $[[CC1:[0-9]+]], $[[I32766]], $5
+; 32-CMP-DAG:   selnez $[[CC2:[0-9]+]], $[[CC0]], $4
+; 32-CMP-DAG:   seleqz $[[CC3:[0-9]+]], $[[CC1]], $4
+; 32-CMP:       or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]]
+; 32-CMP-DAG:   addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:   addiu $[[I4:[0-9]+]], $zero, 4
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $[[I4]], $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $[[I5]], $[[CC]]
+; 32-CMP-DAG:   or $3, $[[T1]], $[[T0]]
+; 32-CMP-DAG:   addiu $2, $zero, 0
+
+; 64-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG:  addiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG:  movz $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  addiu $[[I4:[0-9]+]], $zero, 4
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by adding/subtracting the result of slti
+;        to/from one of the constants.
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_0(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, 32766
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_1:
-; N64: slt ${{[0-9]+}}
+; ALL-LABEL: slti64_1:
+
+; 32-CMOV-DAG:  slt $[[CC:[0-9]+]], $zero, $4
+; 32-CMOV-DAG:  addiu $[[I32766:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG:  sltu $[[R1:[0-9]+]], $[[I32766]], $5
+; 32-CMOV-DAG:  movz $[[CC:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMOV-DAG:  addiu $[[I4:3]], $zero, 4
+; 32-CMOV-DAG:  movn $[[I4]], $[[I5]], $[[CC]]
+; 32-CMOV-DAG:  addiu $2, $zero, 0
+
+; 32-CMP-DAG:   slt $[[CC0:[0-9]+]], $zero, $4
+; 32-CMP-DAG:   addiu $[[I32766:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:   sltu $[[CC1:[0-9]+]], $[[I32766]], $5
+; 32-CMP-DAG:   selnez $[[CC2:[0-9]+]], $[[CC0]], $4
+; 32-CMP-DAG:   seleqz $[[CC3:[0-9]+]], $[[CC1]], $4
+; 32-CMP:       or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]]
+; 32-CMP-DAG:   addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:   addiu $[[I4:[0-9]+]], $zero, 4
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $[[I4]], $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $[[I5]], $[[CC]]
+; 32-CMP-DAG:   or $3, $[[T1]], $[[T0]]
+; 32-CMP-DAG:   addiu $2, $zero, 0
+
+; 64-CMOV-DAG: daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG: daddiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG: daddiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  daddiu $[[I4:2]], $zero, 4
+; 64-CMP-DAG:  daddiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_1(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, 32767
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_2:
-; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti64_2:
+
+; FIXME: The 32-bit versions of this test are too complicated to reasonably
+;        match at the moment. They do show some missing optimizations though
+;        such as:
+;           (movz $a, $b, (neg $c)) -> (movn $a, $b, $c)
+
+; 64-CMOV-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG:  addiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG:  movz $[[I4]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I4:[0-9]+]], $zero, 4
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by adding/subtracting the result of slti
+;        to/from one of the constants.
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_2(i64 %a) {
 entry:
@@ -143,56 +508,273 @@ entry:
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_3:
-; N64: slt ${{[0-9]+}}
+; ALL-LABEL: slti64_3:
+
+; FIXME: The 32-bit versions of this test are too complicated to reasonably
+;        match at the moment. They do show some missing optimizations though
+;        such as:
+;           (movz $a, $b, (neg $c)) -> (movn $a, $b, $c)
+
+; 64-CMOV-DAG: daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG: daddiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, 32766
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  daddiu $[[I4:2]], $zero, 4
+; 64-CMP-DAG:  daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, 32766
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_3(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, -32770
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
 ; sltiu instructions.
 
-; O32-LABEL: sltiu0:
-; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: sltiu0:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, 32767
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu0(i32 %a) {
 entry:
   %cmp = icmp ugt i32 %a, 32766
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu1:
-; O32: sltu ${{[0-9]+}}
+; ALL-LABEL: sltiu1:
+
+; 32-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  addiu $[[I32767:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu1(i32 %a) {
 entry:
   %cmp = icmp ugt i32 %a, 32767
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 7, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu2:
-; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: sltiu2:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, -32768
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu2(i32 %a) {
 entry:
   %cmp = icmp ugt i32 %a, -32769
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu3:
-; O32: sltu ${{[0-9]+}}
+; ALL-LABEL: sltiu3:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 32-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 32-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 32-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 32-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 64-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 64-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 64-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 64-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[IMM]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu3(i32 %a) {
 entry:
   %cmp = icmp ugt i32 %a, -32770
-  %cond = select i1 %cmp, i32 3, i32 4
+  %cond = select i1 %cmp, i32 3, i32 5
   ret i32 %cond
 }
+
+; Check if
+;  (select (setxx a, N), x, x-1) or
+;  (select (setxx a, N), x-1, x)
+; doesn't generate conditional moves
+; for constant operands whose difference is |1|
+
+define i32 @slti4(i32 %a) nounwind readnone {
+  %1 = icmp slt i32 %a, 7
+  %2 = select i1 %1, i32 4, i32 3
+  ret i32 %2
+}
+
+; ALL-LABEL: slti4:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: addiu $2, [[R1]], 3
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  addiu $2, [[R1]], 3
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: addiu $2, [[R1]], 3
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  addiu $2, [[R1]], 3
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
+
+define i32 @slti5(i32 %a) nounwind readnone {
+  %1 = icmp slt i32 %a, 7
+  %2 = select i1 %1, i32 -3, i32 -4
+  ret i32 %2
+}
+
+; ALL-LABEL: slti5:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
+
+define i32 @slti6(i32 %a) nounwind readnone {
+  %1 = icmp slt i32 %a, 7
+  %2 = select i1 %1, i32 3, i32 4
+  ret i32 %2
+}
+
+; ALL-LABEL: slti6:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
+; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  xori [[R1]], [[R1]], 1
+; 32-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
+; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  xori [[R1]], [[R1]], 1
+; 64-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
index 8c0cbe3396b75..186202141dcbd 100644
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=CHECK
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=CHECK64
+; RUN: llc -march=mips64el < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64
 
 ; CHECK-LABEL: mul5_32:
 ; CHECK: sll $[[R0:[0-9]+]], $4, 2
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
index 0332327cec69d..b4c509fcd8c03 100644
--- a/test/CodeGen/Mips/const4a.ll
+++ b/test/CodeGen/Mips/const4a.ll
@@ -15,14 +15,14 @@ define void @t() #0 {
 entry:
   store i32 -559023410, i32* @i, align 4
   %0 = load i32* @b, align 4
-; no-load-relax	lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
+; no-load-relax:	lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
   %tobool = icmp ne i32 %0, 0
   br i1 %tobool, label %if.then, label %if.else
 ; no-load-relax:	beqz	${{[0-9]+}}, $BB0_3
 ; no-load-relax:	lw	${{[0-9]+}}, %call16(foo)(${{[0-9]+}})
 ; no-load-relax:	b	$BB0_4
 ; no-load-relax:	.align	2
-; no-load-relax: $CPI0_0:
+; no-load-relax: $CPI0_1:
 ; no-load-relax:	.4byte	3735943886
 ; no-load-relax: $BB0_3:
 ; no-load-relax:	lw	${{[0-9]+}}, %call16(goo)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
index 20cdc09f7be18..3f02ab907e1e2 100644
--- a/test/CodeGen/Mips/const6.ll
+++ b/test/CodeGen/Mips/const6.ll
@@ -27,7 +27,7 @@ entry:
 ; no-load-relax:	jalrc 	${{[0-9]+}}
 ; no-load-relax:	b	$BB0_2
 ; no-load-relax:	.align	2
-; no-load-relax: $CPI0_0:
+; no-load-relax: $CPI0_1:
 ; no-load-relax:	.4byte	3735943886
 ; no-load-relax: $BB0_2:
 
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
index 8b402accc7de6..d34239058734c 100644
--- a/test/CodeGen/Mips/const6a.ll
+++ b/test/CodeGen/Mips/const6a.ll
@@ -19,7 +19,7 @@ entry:
 ; load-relax: $CPI0_0:
 ; load-relax:	.4byte	3735943886
 ; load-relax:	.end	t
-  call void asm sideeffect ".space 40000", ""() #1, !srcloc !1
+  call void asm sideeffect ".space 10000", ""() #1, !srcloc !1
   ret void
 }
 
diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll
new file mode 100644
index 0000000000000..6e63cff123cf9
--- /dev/null
+++ b/test/CodeGen/Mips/countleading.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=mipsel -mcpu=mips32   < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R1-R2 -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R1-R2 -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R6    -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mips64el -mcpu=mips4    < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS4 %s
+; RUN: llc -march=mips64el -mcpu=mips64   < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+; R!N: llc -march=mips64el -mcpu=mips64r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+
+; Prefixes:
+;   ALL      - All
+;   MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
+;   MIPS64-GT-R1 - MIPS64r1 and above
+
+define i32 @ctlz_i32(i32 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlz_i32:
+
+; MIPS4-NOT:     clz
+
+; MIPS32-GT-R1:  clz $2, $4
+
+; MIPS64-GT-R1:  clz $2, $4
+
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @ctlo_i32(i32 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlo_i32:
+
+; MIPS4-NOT:     clo
+
+; MIPS32-GT-R1:  clo $2, $4
+
+; MIPS64-GT-R1:  clo $2, $4
+
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
+  ret i32 %tmp1
+}
+
+define i64 @ctlz_i64(i64 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlz_i64:
+
+; MIPS4-NOT:     dclz
+
+; MIPS32-GT-R1-DAG: clz $[[R0:[0-9]+]], $4
+; MIPS32-GT-R1-DAG: clz $[[R1:[0-9]+]], $5
+; MIPS32-GT-R1-DAG: addiu $[[R2:2+]], $[[R0]], 32
+; MIPS32-R1-R2-DAG: movn $[[R2]], $[[R1]], $5
+; MIPS32-R6-DAG:    seleqz $[[R5:[0-9]+]], $[[R2]], $5
+; MIPS32-R6-DAG:    selnez $[[R6:[0-9]+]], $[[R1]], $5
+; MIPS32-R6-DAG:    or $2, $[[R6]], $[[R5]]
+; MIPS32-GT-R1-DAG: addiu $3, $zero, 0
+
+; MIPS64-GT-R1:  dclz $2, $4
+
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @ctlo_i64(i64 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlo_i64:
+
+; MIPS4-NOT:     dclo
+
+; MIPS32-GT-R1-DAG: clo $[[R0:[0-9]+]], $4
+; MIPS32-GT-R1-DAG: clo $[[R1:[0-9]+]], $5
+; MIPS32-GT-R1-DAG: addiu $[[R2:2+]], $[[R0]], 32
+; MIPS32-GT-R1-DAG: addiu $[[R3:[0-9]+]], $zero, -1
+; MIPS32-GT-R1-DAG: xor $[[R4:[0-9]+]], $5, $[[R3]]
+; MIPS32-R1-R2-DAG: movn $[[R2]], $[[R1]], $[[R4]]
+; MIPS32-R6-DAG:    selnez $[[R5:[0-9]+]], $[[R1]], $[[R4]]
+; MIPS32-R6-DAG:    seleqz $[[R6:[0-9]+]], $[[R2]], $[[R4]]
+; MIPS32-R6-DAG:    or $2, $[[R5]], $[[R6]]
+; MIPS32-GT-R1-DAG: addiu $3, $zero, 0
+
+; MIPS64-GT-R1:  dclo $2, $4
+
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
diff --git a/test/CodeGen/Mips/ctlz.ll b/test/CodeGen/Mips/ctlz.ll
index 2ddb72755ac8e..1f871664a6cf4 100644
--- a/test/CodeGen/Mips/ctlz.ll
+++ b/test/CodeGen/Mips/ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
 
 @x = global i32 28912, align 4
 @y = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index b631c3b279f4d..97f8360444067 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -1,77 +1,223 @@
-; RUN: llc -march=mips -verify-machineinstrs < %s |\
-; RUN: FileCheck %s -check-prefix=TRAP
-; RUN: llc -march=mips -mno-check-zero-division < %s |\
-; RUN: FileCheck %s -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32   -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=ACC32-TRAP
+; RUN: llc -march=mips   -mcpu=mips32r2 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=ACC32-TRAP
+; RUN: llc -march=mips   -mcpu=mips32r6 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=GPR32-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64   -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=ACC64-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64r2 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=ACC64-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64r6 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=GPR64-TRAP
 
-; TRAP-LABEL: sdiv1:
-; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mflo
+; RUN: llc -march=mips   -mcpu=mips32   -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32r2 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32r6 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64   -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=NOCHECK
 
-; NOCHECK-LABEL: sdiv1:
-; NOCHECK-NOT: teq
-; NOCHECK: .end sdiv1
+; FileCheck Prefixes:
+;   ALL - All targets
+;   ACC32 - Accumulator based multiply/divide on 32-bit targets
+;   ACC64 - Same as ACC32 but only for 64-bit targets
+;   GPR32 - GPR based multiply/divide on 32-bit targets
+;   GPR64 - Same as GPR32 but only for 64-bit targets
+;   ACC32-TRAP - Same as TRAP and ACC32 combined
+;   ACC64-TRAP - Same as TRAP and ACC64 combined
+;   GPR32-TRAP - Same as TRAP and GPR32 combined
+;   GPR64-TRAP - Same as TRAP and GPR64 combined
+;   NOCHECK - Division by zero will not be detected
 
 @g0 = common global i32 0, align 4
 @g1 = common global i32 0, align 4
 
 define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: sdiv1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         div $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         div $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mflo $2
+; ACC64:         mflo $2
+
+; ALL: .end sdiv1
+
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP-LABEL: srem1:
-; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mfhi
-
 define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: srem1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         mod $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         mod $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mfhi $2
+; ACC64:         mfhi $2
+
+; ALL: .end srem1
+
   %rem = srem i32 %a0, %a1
   ret i32 %rem
 }
 
-; TRAP-LABEL: udiv1:
-; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mflo
-
 define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: udiv1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         divu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         divu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mflo $2
+; ACC64:         mflo $2
+
+; ALL: .end udiv1
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP-LABEL: urem1:
-; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mfhi
-
 define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: urem1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         modu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         modu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mfhi $2
+; ACC64:         mfhi $2
+
+; ALL: .end urem1
+
   %rem = urem i32 %a0, %a1
   ret i32 %rem
 }
 
-; TRAP: div $zero,
 define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
+; ALL-LABEL: sdivrem1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC32:         mflo $2
+; ACC32:         mfhi $[[R0:[0-9]+]]
+; ACC32:         sw $[[R0]], 0(${{[0-9]+}})
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sw $[[R0]], 0(${{[0-9]+}})
+
+; GPR32:         mod $[[R0:[0-9]+]], $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR32:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR32-DAG:     div $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         mod $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR64-DAG:     div $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end sdivrem1
+
   %rem = srem i32 %a0, %a1
   store i32 %rem, i32* %r, align 4
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP: divu $zero,
 define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
+; ALL-LABEL: udivrem1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC32:         mflo $2
+; ACC32:         mfhi $[[R0:[0-9]+]]
+; ACC32:         sw $[[R0]], 0(${{[0-9]+}})
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sw $[[R0]], 0(${{[0-9]+}})
+
+; GPR32:         modu $[[R0:[0-9]+]], $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR32:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR32-DAG:     divu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; GPR64:         modu $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR64-DAG:     divu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end udivrem1
+
   %rem = urem i32 %a0, %a1
   store i32 %rem, i32* %r, align 4
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
 
+; FIXME: It's not clear what this is supposed to test.
 define i32 @killFlags() {
 entry:
   %0 = load i32* @g0, align 4
@@ -79,3 +225,164 @@ entry:
   %div = sdiv i32 %0, %1
   ret i32 %div
 }
+
+define i64 @sdiv2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: sdiv2:
+
+; ACC32:         lw $25, %call16(__divdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddiv $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         ddiv $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mflo $2
+
+; ALL: .end sdiv2
+
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @srem2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: srem2:
+
+; ACC32:         lw $25, %call16(__moddi3)(
+; ACC32:         jalr $25
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         dmod $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mfhi $2
+
+; ALL: .end srem2
+
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @udiv2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: udiv2:
+
+; ACC32:         lw $25, %call16(__udivdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         ddivu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mflo $2
+
+; ALL: .end udiv2
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @urem2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: urem2:
+
+; ACC32:         lw $25, %call16(__umoddi3)(
+; ACC32:         jalr $25
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         dmodu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mfhi $2
+
+; ALL: .end urem2
+
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @sdivrem2(i64 %a0, i64 %a1, i64* nocapture %r) nounwind {
+entry:
+; ALL-LABEL: sdivrem2:
+
+; sdivrem2 is too complex to effectively check. We can at least check for the
+; calls though.
+; ACC32:         lw $25, %call16(__moddi3)(
+; ACC32:         jalr $25
+; ACC32:         lw $25, %call16(__divdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddiv $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64:         dmod $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64-DAG:     ddiv $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end sdivrem2
+
+  %rem = srem i64 %a0, %a1
+  store i64 %rem, i64* %r, align 8
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @udivrem2(i64 %a0, i64 %a1, i64* nocapture %r) nounwind {
+entry:
+; ALL-LABEL: udivrem2:
+
+; udivrem2 is too complex to effectively check. We can at least check for the
+; calls though.
+; ACC32:         lw $25, %call16(__umoddi3)(
+; ACC32:         jalr $25
+; ACC32:         lw $25, %call16(__udivdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddivu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64:         dmodu $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64-DAG:     ddivu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end udivrem2
+
+  %rem = urem i64 %a0, %a1
+  store i64 %rem, i64* %r, align 8
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
index acdd17d1afd43..fbd9703996409 100644
--- a/test/CodeGen/Mips/dsp-r1.ll
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+dsp < %s | FileCheck %s
 
 define i32 @test__builtin_mips_extr_w1(i32 %i0, i32, i64 %a0) nounwind {
 entry:
diff --git a/test/CodeGen/Mips/eh-dwarf-cfa.ll b/test/CodeGen/Mips/eh-dwarf-cfa.ll
index 3a21332b5c5a3..6554974bf849b 100644
--- a/test/CodeGen/Mips/eh-dwarf-cfa.ll
+++ b/test/CodeGen/Mips/eh-dwarf-cfa.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | \
+; RUN:      FileCheck %s -check-prefix=CHECK-MIPS64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | \
 ; RUN:      FileCheck %s -check-prefix=CHECK-MIPS64
 
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
index c3003b34b162e..748050c4d34b7 100644
--- a/test/CodeGen/Mips/eh-return32.ll
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mipsel -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mipsel -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=R6
 
 declare void @llvm.eh.return.i32(i32, i8*)
 declare void @foo(...)
@@ -9,7 +11,7 @@ entry:
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f1
+; CHECK:    f1:
 ; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -41,7 +43,8 @@ entry:
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        addu    $sp, $sp, $3
 }
 
@@ -50,7 +53,7 @@ entry:
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f2
+; CHECK:    f2:
 ; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -80,6 +83,7 @@ entry:
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        addu    $sp, $sp, $3
 }
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 32fc5e61899a7..74a43231598cd 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -1,4 +1,7 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=R6
 
 declare void @llvm.eh.return.i64(i64, i8*)
 declare void @foo(...)
@@ -9,7 +12,7 @@ entry:
   call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f1
+; CHECK:    f1:
 ; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -41,9 +44,9 @@ entry:
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        daddu   $sp, $sp, $3
-
 }
 
 define void @f2(i64 %offset, i8* %handler) {
@@ -51,7 +54,7 @@ entry:
   call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f2
+; CHECK:    f2:
 ; CHECK:        .cfi_startproc
 ; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
 ; CHECK:        .cfi_def_cfa_offset [[spoffset]]
@@ -83,7 +86,8 @@ entry:
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        daddu   $sp, $sp, $3
 ; CHECK:        .cfi_endproc
 }
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
new file mode 100644
index 0000000000000..e78497a9521e9
--- /dev/null
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s
+
+define i32 @main() {
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 128, DW.ref.__gxx_personality_v0
+
+entry:
+  invoke void @foo() to label %cont unwind label %lpad
+; CHECK: foo
+; CHECK: jalr
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8*
+    bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+  ret i32 0
+
+cont:
+  ret i32 0
+}
+; CHECK: .cfi_endproc
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @foo()
+
+; CHECK: .hidden DW.ref.__gxx_personality_v0
+; CHECK: .weak DW.ref.__gxx_personality_v0
+; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
+; CHECK: .align 2
+; CHECK: .type DW.ref.__gxx_personality_v0,@object
+; CHECK: .size DW.ref.__gxx_personality_v0, 4
+; CHECK: DW.ref.__gxx_personality_v0:
+; CHECK: .4byte __gxx_personality_v0
diff --git a/test/CodeGen/Mips/elf_eflags.ll b/test/CodeGen/Mips/elf_eflags.ll
new file mode 100644
index 0000000000000..00d8584fdad2c
--- /dev/null
+++ b/test/CodeGen/Mips/elf_eflags.ll
@@ -0,0 +1,86 @@
+; This tests ELF EFLAGS setting with direct object.
+; When the assembler is ready a .s file for it will
+; be created.
+
+; Non-shared (static) is the absence of pic and or cpic.
+
+; EF_MIPS_NOREORDER (0x00000001) is always on by default currently
+; EF_MIPS_PIC (0x00000002)
+; EF_MIPS_CPIC (0x00000004) - See note below
+; EF_MIPS_ABI2 (0x00000020) - n32 not tested yet
+; EF_MIPS_ARCH_32 (0x50000000)
+; EF_MIPS_ARCH_64 (0x60000000)
+; EF_MIPS_ARCH_32R2 (0x70000000)
+; EF_MIPS_ARCH_64R2 (0x80000000)
+
+; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
+; TODO need to support -mno-abicalls
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE32 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-LE32_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE32R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | FileCheck -check-prefix=CHECK-LE32R2_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MICROMIPS %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MICROMIPS_PIC %s
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips4 %s -o - | FileCheck -check-prefix=CHECK-LE64_PIC %s
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | FileCheck -check-prefix=CHECK-LE64_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | FileCheck -check-prefix=CHECK-LE64R2_PIC %s
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+
+; 32(R1) bit with NO_REORDER and static
+; CHECK-LE32: .abicalls
+; CHECK-LE32: .option	pic0
+; CHECK-LE32: .set	noreorder
+;
+; 32(R1) bit with NO_REORDER and PIC
+; CHECK-LE32_PIC: .abicalls
+; CHECK-LE32_PIC: .set	noreorder
+;
+; 32R2 bit with NO_REORDER and static
+; CHECK-LE32R2: .abicalls
+; CHECK-LE32R2: .option pic0
+; CHECK-LE32R2: .set noreorder
+;
+; 32R2 bit with NO_REORDER and PIC
+; CHECK-LE32R2_PIC: .abicalls
+; CHECK-LE32R2_PIC: .set noreorder
+;
+; 32R2 bit MICROMIPS with NO_REORDER and static
+; CHECK-LE32R2-MICROMIPS: .abicalls
+; CHECK-LE32R2-MICROMIPS: .option pic0
+; CHECK-LE32R2-MICROMIPS: .set	micromips
+;
+; 32R2 bit MICROMIPS with NO_REORDER and PIC
+; CHECK-LE32R2-MICROMIPS_PIC: .abicalls
+; CHECK-LE32R2-MICROMIPS_PIC: .set micromips
+;
+; 64(R1) bit with NO_REORDER and static
+; CHECK-LE64: .abicalls
+; CHECK-LE64: .set noreorder
+;
+; 64(R1) bit with NO_REORDER and PIC
+; CHECK-LE64_PIC: .abicalls
+; CHECK-LE64_PIC: .set noreorder
+;
+; 64R2 bit with NO_REORDER and static
+; CHECK-LE64R2: .abicalls
+; CHECK-LE64R2: .set noreorder
+;
+; 64R2 bit with NO_REORDER and PIC
+; CHECK-LE64R2_PIC: .abicalls
+; CHECK-LE64R2_PIC: .set noreorder
+;
+; 32R2 bit MIPS16 with PIC
+; CHECK-LE32R2-MIPS16: .abicalls
+; CHECK-LE32R2-MIPS16: .set mips16
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
index c5535e7661a73..6d024c209c262 100644
--- a/test/CodeGen/Mips/ex2.ll
+++ b/test/CodeGen/Mips/ex2.ll
@@ -6,12 +6,11 @@
 define i32 @main() {
 ; 16-LABEL: main:
 ; 16: 	.cfi_startproc
-; 16: 	save	$ra, $s0, $s1, $s2, 40
-; 16:   .cfi_def_cfa_offset 40
-; 16:   .cfi_offset 18, -8
-; 16:   .cfi_offset 17, -12
-; 16: 	.cfi_offset 16, -16
+; 16: 	save	$16, $17, $ra, 32 # 16 bit inst
+; 16:   .cfi_def_cfa_offset 32
 ; 16: 	.cfi_offset 31, -4
+; 16: 	.cfi_offset 17, -8
+; 16:   .cfi_offset 16, -12
 ; 16:   .cfi_endproc
 entry:
   %retval = alloca i32, align 4
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
index 928914f067dd6..0fba9c4fd08a5 100644
--- a/test/CodeGen/Mips/f16abs.ll
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
 
 @y = global double -1.450000e+00, align 8
 @x = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
index 49d8a7201e8b2..ce1a9a60e7c23 100644
--- a/test/CodeGen/Mips/fabs.ll
+++ b/test/CodeGen/Mips/fabs.ll
@@ -1,21 +1,23 @@
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s -check-prefix=32
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+; Check that abs.[ds] is selected and does not depend on -enable-no-nans-fp-math
+; They obey the Has2008 and ABS2008 configuration bits which govern the
+; conformance to IEEE 754 (1985) and IEEE 754 (2008). When these bits are not
+; present, they confirm to 1985.
+; In 1985 mode, abs.[ds] are arithmetic (i.e. they raise invalid operation
+; exceptions when given NaN's). In 2008 mode, they are non-arithmetic (i.e.
+; they are copies and don't raise any exceptions).
 
-define float @foo0(float %a) nounwind readnone {
-entry:
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s
 
-; 32: lui  $[[T0:[0-9]+]], 32767
-; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
-; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 32: mtc1 $[[AND]], $f0
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 | FileCheck %s
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -enable-no-nans-fp-math | FileCheck %s
 
-; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
-; 32R2: mtc1 $[[INS]], $f0
+define float @foo0(float %a) nounwind readnone {
+entry:
 
-; NO-NAN: abs.s
+; CHECK-LABEL: foo0
+; CHECK: abs.s
 
   %call = tail call float @fabsf(float %a) nounwind readnone
   ret float %call
@@ -26,24 +28,8 @@ declare float @fabsf(float) nounwind readnone
 define double @foo1(double %a) nounwind readnone {
 entry:
 
-; 32: lui  $[[T0:[0-9]+]], 32767
-; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
-; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 32: mtc1 $[[AND]], $f1
-
-; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
-; 32R2: mtc1 $[[INS]], $f1
-
-; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
-; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
-; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
-; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 64: dmtc1   $[[AND]], $f0
-
-; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
-; 64R2: dmtc1 $[[INS]], $f0
-
-; NO-NAN: abs.d
+; CHECK-LABEL: foo1:
+; CHECK: abs.d
 
   %call = tail call double @fabs(double %a) nounwind readnone
   ret double %call
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
index 82919e7139bdd..822902c27d2fa 100644
--- a/test/CodeGen/Mips/fastcc.ll
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -1,4 +1,8 @@
 ; RUN: llc  < %s -march=mipsel | FileCheck %s 
+; RUN: llc  < %s -mtriple=mipsel-none-nacl-gnu \
+; RUN:  | FileCheck %s -check-prefix=CHECK-NACL
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG
+
 
 @gi0 = external global i32
 @gi1 = external global i32
@@ -77,6 +81,8 @@
 @g15 = external global i32
 @g16 = external global i32
 
+@fa = common global [11 x float] zeroinitializer, align 4
+
 define void @caller0() nounwind {
 entry:
 ; CHECK: caller0
@@ -95,6 +101,11 @@ entry:
 ; CHECK: lw  $5
 ; CHECK: lw  $4
 
+; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc.
+; CHECK-NACL-NOT: lw  $14
+; CHECK-NACL-NOT: lw  $15
+; CHECK-NACL-NOT: lw  $24
+
   %0 = load i32* @gi0, align 4
   %1 = load i32* @gi1, align 4
   %2 = load i32* @gi2, align 4
@@ -134,6 +145,11 @@ entry:
 ; CHECK: sw  $24
 ; CHECK: sw  $3
 
+; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc.
+; CHECK-NACL-NOT: sw  $14
+; CHECK-NACL-NOT: sw  $15
+; CHECK-NACL-NOT: sw  $24
+
   store i32 %a0, i32* @g0, align 4
   store i32 %a1, i32* @g1, align 4
   store i32 %a2, i32* @g2, align 4
@@ -251,3 +267,84 @@ entry:
   ret void
 }
 
+define void @caller2() {
+entry:
+
+; NOODDSPREG-LABEL:  caller2
+
+; Check that first 10 arguments are passed in even float registers
+; f0, f2, ... , f18. Check that 11th argument is passed on stack.
+
+; NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(fa)(${{[0-9]+|gp}})
+; NOODDSPREG-DAG:    lwc1    $f0, 0($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f2, 4($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f4, 8($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f6, 12($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f8, 16($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f10, 20($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f12, 24($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f14, 28($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f16, 32($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f18, 36($[[R0]])
+
+; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], 40($[[R0]])
+; NOODDSPREG-DAG:    swc1    $[[F0]], 0($sp)
+
+  %0 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
+  %1 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
+  %2 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
+  %3 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
+  %4 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
+  %5 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
+  %6 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
+  %7 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
+  %8 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
+  %9 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
+  %10 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+  tail call fastcc void @callee2(float %0, float %1, float %2, float %3,
+                                 float %4, float %5, float %6, float %7,
+                                 float %8, float %9, float %10)
+  ret void
+}
+
+define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3,
+                            float %a4, float %a5, float %a6, float %a7,
+                            float %a8, float %a9, float %a10) {
+entry:
+
+; NOODDSPREG-LABEL:  callee2
+
+; NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]
+
+; Check that first 10 arguments are received in even float registers
+; f0, f2, ... , f18. Check that 11th argument is received on stack.
+
+; NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(fa)(${{[0-9]+|gp}})
+; NOODDSPREG-DAG:    swc1    $f0, 0($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f2, 4($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f4, 8($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f6, 12($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f8, 16($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f10, 20($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f12, 24($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f14, 28($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f16, 32($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f18, 36($[[R0]])
+
+; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
+; NOODDSPREG-DAG:    swc1    $[[F0]], 40($[[R0]])
+
+  store float %a0, float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
+  store float %a1, float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
+  store float %a2, float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
+  store float %a3, float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
+  store float %a4, float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
+  store float %a5, float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
+  store float %a6, float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
+  store float %a7, float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
+  store float %a8, float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
+  store float %a9, float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
+  store float %a10, float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll
new file mode 100644
index 0000000000000..b7759831c5a2b
--- /dev/null
+++ b/test/CodeGen/Mips/fcmp.ll
@@ -0,0 +1,783 @@
+; RUN: llc < %s -march=mipsel   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
+; RUN: llc < %s -march=mips64el -mcpu=mips4    | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
+
+define i32 @false_f32(float %a, float %b) nounwind {
+; ALL-LABEL: false_f32:
+; ALL:           addiu $2, $zero, 0
+
+  %1 = fcmp false float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oeq_f32(float %a, float %b) nounwind {
+; ALL-LABEL: oeq_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oeq float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ogt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ogt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ogt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oge_f32(float %a, float %b) nounwind {
+; ALL-LABEL: oge_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oge float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @olt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: olt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp olt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ole_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ole_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ole float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @one_f32(float %a, float %b) nounwind {
+; ALL-LABEL: one_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp one float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ord_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ord_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp ord float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ueq_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ueq_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ueq float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ugt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ugt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ugt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uge_f32(float %a, float %b) nounwind {
+; ALL-LABEL: uge_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uge float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ult_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ult_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ult float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ule_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ule_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ule float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @une_f32(float %a, float %b) nounwind {
+; ALL-LABEL: une_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp une float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uno_f32(float %a, float %b) nounwind {
+; ALL-LABEL: uno_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uno float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @true_f32(float %a, float %b) nounwind {
+; ALL-LABEL: true_f32:
+; ALL:           addiu $2, $zero, 1
+
+  %1 = fcmp true float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @false_f64(double %a, double %b) nounwind {
+; ALL-LABEL: false_f64:
+; ALL:           addiu $2, $zero, 0
+
+  %1 = fcmp false double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oeq_f64(double %a, double %b) nounwind {
+; ALL-LABEL: oeq_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oeq double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ogt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ogt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ogt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oge_f64(double %a, double %b) nounwind {
+; ALL-LABEL: oge_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oge double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @olt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: olt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp olt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ole_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ole_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ole double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @one_f64(double %a, double %b) nounwind {
+; ALL-LABEL: one_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp one double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ord_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ord_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp ord double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ueq_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ueq_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ueq double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ugt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ugt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ugt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uge_f64(double %a, double %b) nounwind {
+; ALL-LABEL: uge_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uge double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ult_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ult_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ult double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ule_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ule_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ule double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @une_f64(double %a, double %b) nounwind {
+; ALL-LABEL: une_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp une double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uno_f64(double %a, double %b) nounwind {
+; ALL-LABEL: uno_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uno double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @true_f64(double %a, double %b) nounwind {
+; ALL-LABEL: true_f64:
+; ALL:           addiu $2, $zero, 1
+
+  %1 = fcmp true double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
index 9f88d0c956b19..148a780fb930c 100644
--- a/test/CodeGen/Mips/fcopysign-f32-f64.ll
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 1c57eca3c9eca..3a9d9c73b279e 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,5 +1,6 @@
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
@@ -16,7 +17,7 @@ entry:
 
 ; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
 ; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
-; 32R2: mtc1 $[[INS]], $f1
+; 32R2: mthc1 $[[INS]], $f0
 
 ; 64: daddiu $[[T0:[0-9]+]], $zero, 1
 ; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
index b08eefd712351..4271ac222edb2 100644
--- a/test/CodeGen/Mips/fixdfsf.ll
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
 
 @x = common global double 0.000000e+00, align 8
 @y = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index 435b419368b31..271631efb40ae 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -1,11 +1,58 @@
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+; Check that madd.[ds], msub.[ds], nmadd.[ds], and nmsub.[ds] are supported
+; correctly.
+; The spec for nmadd.[ds], and nmsub.[ds] does not state that they obey the
+; the Has2008 and ABS2008 configuration bits which govern the conformance to
+; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only
+; available when -enable-no-nans-fp-math is given.
+
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32   -check-prefix=32-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64   -check-prefix=64-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              | FileCheck %s -check-prefix=ALL -check-prefix=32 -check-prefix=32-NAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64   -check-prefix=64-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NAN
 
 define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK: madd.s 
+; ALL-LABEL: FOO0float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:          madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 64R2:          madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
   %add1 = fadd float %add, 0.000000e+00
@@ -14,7 +61,39 @@ entry:
 
 define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK: msub.s 
+; ALL-LABEL: FOO1float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:          msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 64R2:          msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   %add = fadd float %sub, 0.000000e+00
@@ -23,10 +102,44 @@ entry:
 
 define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; 32R2: nmadd.s 
-; 64R2: nmadd.s 
-; 32R2NAN: madd.s 
-; 64R2NAN: madd.s 
+; ALL-LABEL: FOO2float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:    nmadd.s $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:      madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmadd.s $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
   %sub = fsub float 0.000000e+00, %add
@@ -35,10 +148,36 @@ entry:
 
 define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; 32R2: nmsub.s 
-; 64R2: nmsub.s 
-; 32R2NAN: msub.s 
-; 64R2NAN: msub.s 
+; ALL-LABEL: FOO3float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:    nmsub.s $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:      msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 64R2-NAN:      msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   %sub1 = fsub float 0.000000e+00, %sub
@@ -47,7 +186,40 @@ entry:
 
 define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK: madd.d
+; ALL-LABEL: FOO10double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:          madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          mthc1 $zero, $[[T2]]
+; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 64R2:          madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
   %add1 = fadd double %add, 0.000000e+00
@@ -56,7 +228,40 @@ entry:
 
 define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK: msub.d
+; ALL-LABEL: FOO11double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:          msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          mthc1 $zero, $[[T2]]
+; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 64R2:          msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   %add = fadd double %sub, 0.000000e+00
@@ -65,10 +270,45 @@ entry:
 
 define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; 32R2: nmadd.d 
-; 64R2: nmadd.d 
-; 32R2NAN: madd.d 
-; 64R2NAN: madd.d 
+; ALL-LABEL: FOO12double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:    nmadd.d $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:      madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      mthc1 $zero, $[[T2]]
+; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmadd.d $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
   %sub = fsub double 0.000000e+00, %add
@@ -77,10 +317,45 @@ entry:
 
 define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; 32R2: nmsub.d 
-; 64R2: nmsub.d 
-; 32R2NAN: msub.d 
-; 64R2NAN: msub.d 
+; ALL-LABEL: FOO13double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:    nmsub.d $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:      msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      mthc1 $zero, $[[T2]]
+; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmsub.d $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   %sub1 = fsub double 0.000000e+00, %sub
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
index b322abdaa23c5..4fb80fdb4f32f 100644
--- a/test/CodeGen/Mips/fneg.ll
+++ b/test/CodeGen/Mips/fneg.ll
@@ -1,17 +1,30 @@
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+; Check that abs.[ds] is selected and does not depend on -enable-no-nans-fp-math
+; They obey the Has2008 and ABS2008 configuration bits which govern the
+; conformance to IEEE 754 (1985) and IEEE 754 (2008). When these bits are not
+; present, they confirm to 1985.
+; In 1985 mode, abs.[ds] are arithmetic (i.e. they raise invalid operation
+; exceptions when given NaN's). In 2008 mode, they are non-arithmetic (i.e.
+; they are copies and don't raise any exceptions).
 
-define float @foo0(i32 %a, float %d) nounwind readnone {
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s
+
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 | FileCheck %s
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -enable-no-nans-fp-math | FileCheck %s
+
+define float @foo0(float %d) nounwind readnone {
 entry:
-; CHECK-NOT: neg.s
+; CHECK-LABEL: foo0:
+; CHECK: neg.s
   %sub = fsub float -0.000000e+00, %d
   ret float %sub
 }
 
-define double @foo1(i32 %a, double %d) nounwind readnone {
+define double @foo1(double %d) nounwind readnone {
 entry:
-; CHECK:     foo1
-; CHECK-NOT: neg.d
-; CHECK:     jr
+; CHECK-LABEL: foo1:
+; CHECK: neg.d
   %sub = fsub double -0.000000e+00, %d
   ret double %sub
 }
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index 1c4a3fdb4a420..787e131f6ec54 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -1,4 +1,13 @@
-; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -march=mipsel   -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R1
+; RUN: llc -march=mipsel   -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R2
+; RUN: llc -march=mipsel   -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R6
+; RUN: llc -march=mips64el -mcpu=mips4    -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64   -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64R6
+
+; Check that [ls][dwu]xc1 are not emitted for nacl.
+; RUN: llc -mtriple=mipsel-none-nacl-gnu -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=CHECK-NACL
 
 %struct.S = type <{ [4 x float] }>
 %struct.S2 = type <{ [4 x double] }>
@@ -12,7 +21,30 @@
 
 define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
 entry:
-; CHECK: lwxc1
+; ALL-LABEL: foo0:
+
+; MIPS32R1:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R1:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R1:      lwc1 $f0, 0($[[T3]])
+
+; MIPS32R2:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R2:      lwxc1 $f0, $[[T1]]($4)
+
+; MIPS32R6:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R6:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R6:      lwc1 $f0, 0($[[T3]])
+
+; MIPS4:         sll $[[T0:[0-9]+]], $5, 0
+; MIPS4:         dsll $[[T1:[0-9]+]], $[[T0]], 2
+; MIPS4:         lwxc1 $f0, $[[T1]]($4)
+
+; MIPS64R6:      sll $[[T0:[0-9]+]], $5, 0
+; MIPS64R6:      dsll $[[T1:[0-9]+]], $[[T0]], 2
+; MIPS64R6:      daddu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS64R6:      lwc1 $f0, 0($[[T3]])
+
+; CHECK-NACL-NOT: lwxc1
+
   %arrayidx = getelementptr inbounds float* %b, i32 %o
   %0 = load float* %arrayidx, align 4
   ret float %0
@@ -20,7 +52,30 @@ entry:
 
 define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
 entry:
-; CHECK: ldxc1
+; ALL-LABEL: foo1:
+
+; MIPS32R1:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R1:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R1:      ldc1 $f0, 0($[[T3]])
+
+; MIPS32R2:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R2:      ldxc1 $f0, $[[T1]]($4)
+
+; MIPS32R6:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R6:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R6:      ldc1 $f0, 0($[[T3]])
+
+; MIPS4:         sll $[[T0:[0-9]+]], $5, 0
+; MIPS4:         dsll $[[T1:[0-9]+]], $[[T0]], 3
+; MIPS4:         ldxc1 $f0, $[[T1]]($4)
+
+; MIPS64R6:      sll $[[T0:[0-9]+]], $5, 0
+; MIPS64R6:      dsll $[[T1:[0-9]+]], $[[T0]], 3
+; MIPS64R6:      daddu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS64R6:      ldc1 $f0, 0($[[T3]])
+
+; CHECK-NACL-NOT: ldxc1
+
   %arrayidx = getelementptr inbounds double* %b, i32 %o
   %0 = load double* %arrayidx, align 8
   ret double %0
@@ -28,7 +83,23 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo2:
+
+; luxc1 did not exist in MIPS32r1
+; MIPS32R1-NOT:  luxc1
+
+; luxc1 is a misnomer since it aligns the given pointer downwards and performs
+; an aligned load. We mustn't use it to handle unaligned loads.
+; MIPS32R2-NOT:  luxc1
+
+; luxc1 was removed in MIPS32r6
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; luxc1 was removed in MIPS64r6
+; MIPS64R6-NOT:  luxc1
+
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   %0 = load float* %arrayidx1, align 1
   ret float %0
@@ -36,7 +107,28 @@ entry:
 
 define void @foo3(float* nocapture %b, i32 %o) nounwind {
 entry:
-; CHECK: swxc1
+; ALL-LABEL: foo3:
+
+; MIPS32R1-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R1-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R1-DAG:  swc1 $[[T0]], 0($[[T1]])
+
+; MIPS32R2:      lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R2:      swxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS32R6-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R6-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R6-DAG:  swc1 $[[T0]], 0($[[T1]])
+
+; MIPS4:         lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS4:         swxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS64R6-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS64R6-DAG:  daddu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS64R6-DAG:  swc1 $[[T0]], 0($[[T1]])
+
+; CHECK-NACL-NOT: swxc1
+
   %0 = load float* @gf, align 4
   %arrayidx = getelementptr inbounds float* %b, i32 %o
   store float %0, float* %arrayidx, align 4
@@ -45,7 +137,28 @@ entry:
 
 define void @foo4(double* nocapture %b, i32 %o) nounwind {
 entry:
-; CHECK: sdxc1
+; ALL-LABEL: foo4:
+
+; MIPS32R1-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R1-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R1-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
+; MIPS32R2:      ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R2:      sdxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS32R6-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R6-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R6-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
+; MIPS4:         ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS4:         sdxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS64R6-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS64R6-DAG:  daddu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS64R6-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
+; CHECK-NACL-NOT: sdxc1
+
   %0 = load double* @gd, align 8
   %arrayidx = getelementptr inbounds double* %b, i32 %o
   store double %0, double* %arrayidx, align 8
@@ -54,7 +167,18 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo5:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   %0 = load float* @gf, align 4
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   store float %0, float* %arrayidx1, align 1
@@ -63,8 +187,18 @@ entry:
 
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: foo6
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo6:
+
+; MIPS32R1-NOT:  luxc1
+
+; MIPS32R2-NOT:  luxc1
+
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; MIPS64R6-NOT:  luxc1
+
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   %0 = load double* %arrayidx1, align 1
   ret double %0
@@ -72,8 +206,18 @@ entry:
 
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: foo7
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo7:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   %0 = load double* @gd, align 8
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   store double %0, double* %arrayidx1, align 1
@@ -82,16 +226,36 @@ entry:
 
 define float @foo8() nounwind readonly {
 entry:
-; CHECK: foo8
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo8:
+
+; MIPS32R1-NOT:  luxc1
+
+; MIPS32R2-NOT:  luxc1
+
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; MIPS64R6-NOT:  luxc1
+
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret float %0
 }
 
 define void @foo9(float %f) nounwind {
 entry:
-; CHECK: foo9
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo9:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index bb43d2711c268..7ced36c016f7d 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask
 
 @x = global float 1.500000e+00, align 4
 @xn = global float -1.900000e+01, align 4
diff --git a/test/CodeGen/Mips/fp16mix.ll b/test/CodeGen/Mips/fp16mix.ll
index 8d85099ba9f20..a94f838fb675c 100644
--- a/test/CodeGen/Mips/fp16mix.ll
+++ b/test/CodeGen/Mips/fp16mix.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
 
 ; Function Attrs: nounwind optsize readnone
 define void @foo1()  {
@@ -17,7 +17,7 @@ entry:
 ; fmask1: .set	reorder
 ; fmask1: .end	foo1
 ; fmask2: .ent	foo1
-; fmask2: save	{{.*}}
+; fmask2: jrc $ra
 ; fmask2: .end	foo1
 ; fmask1nr: .ent foo1
 ; fmask1nr: .set	noreorder
@@ -42,10 +42,10 @@ entry:
 ; fmask2: .set	reorder
 ; fmask2: .end	foo2
 ; fmask1: .ent	foo2
-; fmask1: save	{{.*}}
+; fmask1: jrc $ra
 ; fmask1: .end	foo2
 ; fmask1nr: .ent	foo2
-; fmask1nr: save	{{.*}}
+; fmask1nr: jrc $ra
 ; fmask1nr: .end	foo2
 }
 
@@ -62,10 +62,10 @@ entry:
 ; fmask1: .set	reorder
 ; fmask1: .end	foo3
 ; fmask2:  .ent	foo3
-; fmask2:  save	{{.*}}
+; fmask2:  jrc $ra
 ; fmask2:  .end	foo3
 ; fmask1r:  .ent	foo3
-; fmask1r:  save	{{.*}}
+; fmask1r:  jrc $ra
 ; fmask1r:  .end	foo3
 }
 
@@ -82,10 +82,10 @@ entry:
 ; fmask2: .set	reorder
 ; fmask2: .end	foo4
 ; fmask1: .ent	foo4
-; fmask1: save	{{.*}}
+; fmask1: jrc $ra
 ; fmask1: .end	foo4
 ; fmask1nr: .ent	foo4
-; fmask1nr: save	{{.*}}
+; fmask1nr: jrc $ra
 ; fmask1nr: .end	foo4
 }
 
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
index 240ec75a36b64..beb063db15cad 100644
--- a/test/CodeGen/Mips/fp16static.ll
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 @x = common global float 0.000000e+00, align 4
 
diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll
new file mode 100644
index 0000000000000..5c2c87373a322
--- /dev/null
+++ b/test/CodeGen/Mips/fp64a.ll
@@ -0,0 +1,197 @@
+; Test that the FP64A ABI performs double precision moves via a spill/reload.
+; The requirement is really that odd-numbered double precision registers do not
+; use mfc1/mtc1 to move the bottom 32-bits (because the hardware will redirect
+; this to the top 32-bits of the even register) but we have to make the decision
+; before register allocation so we do this for all double-precision values.
+
+; We don't test MIPS32r1 since support for 64-bit coprocessors (such as a 64-bit
+; FPU) on a 32-bit architecture was added in MIPS32r2.
+; FIXME: We currently don't test that attempting to use FP64 on MIPS32r1 is an
+;        error either. This is because a large number of CodeGen tests are
+;        incorrectly using this case. We should fix those test cases then add
+;        this check here.
+
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE
+
+; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
+; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
+; RUN: not llc -march=mips64el -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
+
+; 64-FP64A: LLVM ERROR: -mattr=+nooddspreg requires the O32 ABI.
+
+declare double @dbl();
+
+define double @call1(double %d, ...) {
+  ret double %d
+
+; ALL-LABEL:            call1:
+
+; 32R2-NO-FP64A-LE-NOT:     addiu   $sp, $sp
+; 32R2-NO-FP64A-LE:         mtc1    $4, $f0
+; 32R2-NO-FP64A-LE:         mthc1   $5, $f0
+
+; 32R2-NO-FP64A-BE-NOT:     addiu   $sp, $sp
+; 32R2-NO-FP64A-BE:         mtc1    $5, $f0
+; 32R2-NO-FP64A-BE:         mthc1   $4, $f0
+
+; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:            sw      $4, 0($sp)
+; 32R2-FP64A-LE:            sw      $5, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:            sw      $5, 0($sp)
+; 32R2-FP64A-BE:            sw      $4, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A:              daddiu  $sp, $sp, -64
+; 64-NO-FP64A:              mov.d   $f0, $f12
+}
+
+define double @call2(i32 %i, double %d) {
+  ret double %d
+
+; ALL-LABEL:        call2:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
+; 64-NO-FP64A:          mov.d   $f0, $f13
+}
+
+define double @call3(float %f1, float %f2, double %d) {
+  ret double %d
+
+; ALL-LABEL:        call3:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
+; 64-NO-FP64A:          mov.d   $f0, $f14
+}
+
+define double @call4(float %f, double %d, ...) {
+  ret double %d
+
+; ALL-LABEL:        call4:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A:          daddiu  $sp, $sp, -48
+; 64-NO-FP64A:          mov.d   $f0, $f13
+}
+
+define double @call5(double %a, double %b, ...) {
+  %1 = fsub double %a, %b
+  ret double %1
+
+; ALL-LABEL:            call5:
+
+; 32R2-NO-FP64A-LE-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mthc1   $5, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mthc1   $7, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-LE:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-NO-FP64A-BE-DAG:     mtc1    $5, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mthc1   $4, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mtc1    $7, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mthc1   $6, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-BE:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:            sw      $6, 0($sp)
+; 32R2-FP64A-LE:            sw      $7, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A-LE:            sw      $4, 0($sp)
+; 32R2-FP64A-LE:            sw      $5, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A-LE:            sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:            sw      $7, 0($sp)
+; 32R2-FP64A-BE:            sw      $6, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A-BE:            sw      $5, 0($sp)
+; 32R2-FP64A-BE:            sw      $4, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A-BE:            sub.d   $f0, $[[T0]], $[[T1]]
+
+; 64-NO-FP64A:              sub.d   $f0, $f12, $f13
+}
+
+define double @move_from(double %d) {
+  %1 = call double @dbl()
+  %2 = call double @call2(i32 0, double %1)
+  ret double %2
+
+; ALL-LABEL:        move_from:
+
+; 32R2-NO-FP64A-LE-DAG: mfc1    $6, $f0
+; 32R2-NO-FP64A-LE-DAG: mfhc1   $7, $f0
+
+; 32R2-NO-FP64A-BE-DAG: mfc1    $7, $f0
+; 32R2-NO-FP64A-BE-DAG: mfhc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -32
+; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-LE:        lw      $6, 16($sp)
+; FIXME: This store is redundant
+; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-LE:        lw      $7, 20($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -32
+; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-BE:        lw      $6, 20($sp)
+; FIXME: This store is redundant
+; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-BE:        lw      $7, 16($sp)
+
+; 64-NO-FP64A:          mov.d   $f13, $f0
+}
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index a136557cc4a36..311b83015a565 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -1,9 +1,25 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=32-FCC
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=32-FCC
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=32-GPR
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=64-FCC
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=64-FCC
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=64-GPR
 
 define void @func0(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.eq.s
-; CHECK: bc1f
+; ALL-LABEL: func0:
+
+; 32-FCC:        c.eq.s $f12, $f14
+; 64-FCC:        c.eq.s $f12, $f13
+; FCC:           bc1f   $BB0_2
+
+; 32-GPR:        cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f14
+; 64-GPR:        cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f13
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; FIXME: We ought to be able to transform not+bnez -> beqz
+; GPR:           not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB0_2
+
   %cmp = fcmp oeq float %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -25,8 +41,18 @@ declare void @g1(...)
 
 define void @func1(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.olt.s
-; CHECK: bc1f
+; ALL-LABEL: func1:
+
+; 32-FCC:        c.olt.s $f12, $f14
+; 64-FCC:        c.olt.s $f12, $f13
+; FCC:           bc1f    $BB1_2
+
+; 32-GPR:        cmp.ule.s $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ule.s $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB1_2
+
   %cmp = fcmp olt float %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -44,8 +70,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func2(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.ole.s
-; CHECK: bc1t
+; ALL-LABEL: func2:
+
+; 32-FCC:        c.ole.s $f12, $f14
+; 64-FCC:        c.ole.s $f12, $f13
+; FCC:           bc1t    $BB2_2
+
+; 32-GPR:        cmp.ult.s $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ult.s $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB2_2
+
   %cmp = fcmp ugt float %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
@@ -63,8 +99,19 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func3(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.eq.d
-; CHECK: bc1f
+; ALL-LABEL: func3:
+
+; 32-FCC:        c.eq.d $f12, $f14
+; 64-FCC:        c.eq.d $f12, $f13
+; FCC:           bc1f $BB3_2
+
+; 32-GPR:        cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f14
+; 64-GPR:        cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f13
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; FIXME: We ought to be able to transform not+bnez -> beqz
+; GPR:           not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB3_2
+
   %cmp = fcmp oeq double %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -82,8 +129,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func4(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.olt.d
-; CHECK: bc1f
+; ALL-LABEL: func4:
+
+; 32-FCC:        c.olt.d $f12, $f14
+; 64-FCC:        c.olt.d $f12, $f13
+; FCC:           bc1f $BB4_2
+
+; 32-GPR:        cmp.ule.d $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ule.d $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB4_2
+
   %cmp = fcmp olt double %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -101,8 +158,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func5(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.ole.d
-; CHECK: bc1t
+; ALL-LABEL: func5:
+
+; 32-FCC:        c.ole.d $f12, $f14
+; 64-FCC:        c.ole.d $f12, $f13
+; FCC:           bc1t $BB5_2
+
+; 32-GPR:        cmp.ult.d $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ult.d $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB5_2
+
   %cmp = fcmp ugt double %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
index dcdebb92e40e2..fdd8e8f707ef4 100644
--- a/test/CodeGen/Mips/fpneeded.ll
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -10,7 +10,7 @@ entry:
   ret float 1.000000e+00
 }
 
-; 32: 	.set	nomips16                  # @fv
+; 32: 	.set	nomips16
 ; 32: 	.ent	fv
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -26,7 +26,7 @@ entry:
   ret double 2.000000e+00
 }
 
-; 32: 	.set	nomips16                  # @dv
+; 32: 	.set	nomips16
 ; 32: 	.ent	dv
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -44,7 +44,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @vf
+; 32: 	.set	nomips16
 ; 32: 	.ent	vf
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -62,7 +62,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @vd
+; 32: 	.set	nomips16
 ; 32: 	.ent	vd
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -83,7 +83,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @foo1
+; 32: 	.set	nomips16
 ; 32: 	.ent	foo1
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -102,7 +102,7 @@ entry:
 }
 
 
-; 32: 	.set	nomips16                  # @foo2
+; 32: 	.set	nomips16
 ; 32: 	.ent	foo2
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -120,7 +120,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @foo3
+; 32: 	.set	nomips16
 ; 32: 	.ent	foo3
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -138,7 +138,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @vv
+; 32: 	.set	mips16
 ; 32: 	.ent	vv
 
 ; 32:	save	{{.+}}
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
index b4fab6414223e..e12d7baacdbb0 100644
--- a/test/CodeGen/Mips/fpnotneeded.ll
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -1,4 +1,6 @@
-; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -relocation-model=static -O3 -mips16-constant-islands < %s -mips-os16  | FileCheck %s -check-prefix=cisle
 
 @i = global i32 1, align 4
 @f = global float 1.000000e+00, align 4
@@ -8,7 +10,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @vv
+; 32: 	.set	mips16
 ; 32: 	.ent	vv
 
 ; 32:	save	{{.+}}
@@ -21,7 +23,7 @@ entry:
   ret i32 %0
 }
 
-; 32: 	.set	mips16                  # @iv
+; 32: 	.set	mips16
 ; 32: 	.ent	iv
 
 ; 32:	save	{{.+}}
@@ -37,7 +39,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @vif
+; 32: 	.set	mips16
 ; 32: 	.ent	vif
 
 ; 32:	save	{{.+}}
@@ -50,13 +52,15 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
 
 ; 32:	save	{{.+}}
 ; 32:	restore	{{.+}} 
 ; 32:	.end	foo
 
+; cisle:	.end	foo
+
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 
@@ -65,7 +69,7 @@ entry:
   ret float 1.000000e+00
 }
 
-; 32: 	.set	nomips16                  # @fv
+; 32: 	.set	nomips16
 ; 32: 	.ent	fv
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
diff --git a/test/CodeGen/Mips/fptr2.ll b/test/CodeGen/Mips/fptr2.ll
index 77028dbde9aaf..c8b5e0d1771ee 100644
--- a/test/CodeGen/Mips/fptr2.ll
+++ b/test/CodeGen/Mips/fptr2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=static16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=static16
 
 ; Function Attrs: nounwind
 define double @my_mul(double %a, double %b) #0 {
diff --git a/test/CodeGen/Mips/fpxx.ll b/test/CodeGen/Mips/fpxx.ll
new file mode 100644
index 0000000000000..7e2ed22e2d805
--- /dev/null
+++ b/test/CodeGen/Mips/fpxx.ll
@@ -0,0 +1,221 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-NOFPXX
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-FPXX
+
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NOFPXX
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FPXX
+
+; RUN: llc -march=mips64 -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-NOFPXX
+; RUN: not llc -march=mips64 -mcpu=mips4 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=4-FPXX
+
+; RUN: llc -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NOFPXX
+; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX
+
+; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-NOFPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX
+
+; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-NOFPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX
+
+declare double @dbl();
+
+; 4-FPXX:  LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
+; 64-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
+
+define double @test1(double %d, ...) {
+  ret double %d
+
+; ALL-LABEL: test1:
+
+; 32-NOFPXX:     mtc1    $4, $f0
+; 32-NOFPXX:     mtc1    $5, $f1
+
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $4, 0($sp)
+; 32-FPXX:       sw      $5, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $4, $f0
+; 32R2-NOFPXX:   mthc1   $5, $f0
+
+; 32R2-FPXX:     mtc1    $4, $f0
+; 32R2-FPXX:     mthc1   $5, $f0
+
+; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
+; 4-NOFPXX:      mov.d   $f0, $f12
+
+; 64-NOFPXX:     mov.d   $f0, $f12
+}
+
+define double @test2(i32 %i, double %d) {
+  ret double %d
+
+; ALL-LABEL: test2:
+
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
+
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $6, 0($sp)
+; 32-FPXX:       sw      $7, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
+
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
+
+; 4-NOFPXX:      mov.d   $f0, $f13
+
+; 64-NOFPXX:     mov.d   $f0, $f13
+}
+
+define double @test3(float %f1, float %f2, double %d) {
+  ret double %d
+
+; ALL-LABEL: test3:
+
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
+
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $6, 0($sp)
+; 32-FPXX:       sw      $7, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
+
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
+
+; 4-NOFPXX:      mov.d   $f0, $f14
+
+; 64-NOFPXX:     mov.d   $f0, $f14
+}
+
+define double @test4(float %f, double %d, ...) {
+  ret double %d
+
+; ALL-LABEL: test4:
+
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
+
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $6, 0($sp)
+; 32-FPXX:       sw      $7, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
+
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
+
+; 4-NOFPXX:      mov.d   $f0, $f13
+
+; 64-NOFPXX:     mov.d   $f0, $f13
+}
+
+define double @test5() {
+  ret double 0.000000e+00
+
+; ALL-LABEL: test5:
+
+; 32-NOFPXX:     mtc1    $zero, $f0
+; 32-NOFPXX:     mtc1    $zero, $f1
+
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $zero, 0($sp)
+; 32-FPXX:       sw      $zero, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $zero, $f0
+; 32R2-NOFPXX:   mthc1   $zero, $f0
+
+; 32R2-FPXX:     mtc1    $zero, $f0
+; 32R2-FPXX:     mthc1   $zero, $f0
+
+; 4-NOFPXX:      dmtc1 $zero, $f0
+
+; 64-NOFPXX:     dmtc1 $zero, $f0
+}
+
+define double @test6(double %a, double %b, ...) {
+  %1 = fsub double %a, %b
+  ret double %1
+
+; ALL-LABEL:     test6:
+
+; 32-NOFPXX-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32-NOFPXX-DAG:     mtc1    $5, ${{f[0-9]*[13579]}}
+; 32-NOFPXX-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32-NOFPXX-DAG:     mtc1    $7, ${{f[0-9]*[13579]}}
+; 32-NOFPXX:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32-FPXX:           addiu   $sp, $sp, -8
+; 32-FPXX:           sw      $6, 0($sp)
+; 32-FPXX:           sw      $7, 4($sp)
+; 32-FPXX:           ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32-FPXX:           sw      $4, 0($sp)
+; 32-FPXX:           sw      $5, 4($sp)
+; 32-FPXX:           ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32-FPXX:           sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-NOFPXX-DAG:   mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-NOFPXX-DAG:   mthc1   $5, $[[T0]]
+; 32R2-NOFPXX-DAG:   mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-NOFPXX-DAG:   mthc1   $7, $[[T1]]
+; 32R2-NOFPXX:       sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FPXX-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-FPXX-DAG:     mthc1   $5, $[[T0]]
+; 32R2-FPXX-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-FPXX-DAG:     mthc1   $7, $[[T1]]
+; 32R2-FPXX:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
+; 4-NOFPXX:          sub.d   $f0, $f12, $f13
+
+; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
+; 64-NOFPXX:         sub.d   $f0, $f12, $f13
+}
+
+define double @move_from1(double %d) {
+  %1 = call double @dbl()
+  %2 = call double @test2(i32 0, double %1)
+  ret double %2
+
+; ALL-LABEL:   move_from1:
+
+; 32-NOFPXX-DAG:   mfc1    $6, $f0
+; 32-NOFPXX-DAG:   mfc1    $7, $f1
+
+; 32-FPXX:         addiu   $sp, $sp, -32
+; 32-FPXX:         sdc1    $f0, 16($sp)
+; 32-FPXX:         lw      $6, 16($sp)
+; FIXME: This store is redundant
+; 32-FPXX:         sdc1    $f0, 16($sp)
+; 32-FPXX:         lw      $7, 20($sp)
+
+; 32R2-NOFPXX-DAG: mfc1    $6, $f0
+; 32R2-NOFPXX-DAG: mfhc1   $7, $f0
+
+; 32R2-FPXX-DAG:   mfc1    $6, $f0
+; 32R2-FPXX-DAG:   mfhc1   $7, $f0
+
+; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used.
+; We can't use inline assembly to force a copy either because trying to force
+; a copy to a GPR this way fails with ; "couldn't allocate input reg for
+; constraint 'r'". It therefore seems impossible to test the generation of dmfc1
+; in a simple test.
+; 4-NOFPXX:        mov.d   $f13, $f0
+
+; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used.
+; We can't use inline assembly to force a copy either because trying to force
+; a copy to a GPR this way fails with ; "couldn't allocate input reg for
+; constraint 'r'". It therefore seems impossible to test the generation of dmfc1
+; in a simple test.
+; 64-NOFPXX:       mov.d   $f13, $f0
+}
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
index 0d49a7424ad67..0785cfcc05150 100644
--- a/test/CodeGen/Mips/global-address.ll
+++ b/test/CodeGen/Mips/global-address.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
 ; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 @s1 = internal unnamed_addr global i32 8, align 4
 @g1 = external global i32
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index 058a041c16a92..36f4ad6b55c0b 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -15,7 +15,7 @@ entry:
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
   ret i32 0
 
-; SR: 	.set	mips16                  # @main
+; SR: 	.set	mips16
 
 ; SR32: .set nomips16
 ; SR32: .ent main
@@ -25,10 +25,9 @@ entry:
 ; SR32:  .set noreorder
 ; SR32:  .set nomacro
 ; SR32:  .set noat
-; SR:	save 	$ra, $s0, $s1, $s2, [[FS:[0-9]+]]
+; SR:	save 	$ra, 24 # 16 bit inst
 ; PE:    .ent main
-; PE:    .align  2
-; PE-NEXT:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
+; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
 ; PE-NEXT: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
 ; PE:	        sll	$[[T3:[0-9]+]], $[[T1]], 16
 ; C1:	lw	${{[0-9]+}}, %got($.str)(${{[0-9]+}})
@@ -37,7 +36,7 @@ entry:
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
 ; C1:	jalrc 	${{[0-9]+}}
-; SR:	restore 	$ra, $s0, $s1, $s2, [[FS]]
+; SR:	restore $ra,	24 # 16 bit inst
 ; PE:	li	$2, 0
 ; PE:	jrc 	$ra
 
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
index c7454ee0a8dde..9879cd523af31 100644
--- a/test/CodeGen/Mips/hf16_1.ll
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -1,5 +1,5 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=2
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2
 
 
 @x = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index 461438e8bec06..aec9c71c485b9 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
 
 @x = common global float 0.000000e+00, align 4
 @y = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
index 34bae26f85f3a..adac31460c44c 100644
--- a/test/CodeGen/Mips/hf16call32_body.ll
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
 
 @x = external global float
 @xd = external global double
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
index b2cce92aa1a42..5acfe86373d99 100644
--- a/test/CodeGen/Mips/hf1_body.ll
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16  -relocation-model=pic -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=picfp16
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16  -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16
 
 @x = external global float
 
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index 25639dad63a82..9df8d900693c0 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
 
 @ptrsv = global float ()* @sv, align 4
 @ptrdv = global double ()* @dv, align 4
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index f4dd1eb78a1d1..73f1302beec0b 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
index f9e53cbb07a48..c09108dc07447 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
 entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'I'
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
index 1fdf672fe1977..2b24b0f82c57f 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
@@ -10,7 +10,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'J'
 
-  tail call i32 asm "addi $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
+  tail call i32 asm "addiu $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
index 49dcc8745857c..5edb3e24674e0 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
@@ -10,7 +10,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'L'
 
-  tail call i32 asm "addi $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
+  tail call i32 asm "addiu $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
index 770669d913e80..eaa540acdafab 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
@@ -11,7 +11,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'N'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
index cd4431ac52650..56afbaaa9cd67 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
@@ -11,6 +11,6 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'O'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
index 0a4739ebb96bd..0a55cb55e5f23 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
@@ -11,6 +11,6 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'P'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 94ded307fda96..a67ddce222aef 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -1,33 +1,34 @@
 ; Positive test for inline register constraints
 ;
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
 
 ; r with char
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},23
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},23
 ;CHECK:	#NO_APP
-  tail call i8 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+  tail call i8 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
 
 ; r with short
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},13
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},13
 ;CHECK:	#NO_APP
-  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
 
 ; r with int
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK:	#NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
 
 ; Now c with 1024: make sure register $25 is picked
 ; CHECK: #APP
-; CHECK: addi $25,${{[0-9]+}},1024
+; CHECK: addiu $25,${{[0-9]+}},1024
 ; CHECK: #NO_APP	
-   tail call i32 asm sideeffect "addi $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+   tail call i32 asm sideeffect "addiu $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
 
 ; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
 ; after the inline expression for a mflo to pull the value out of lo.
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
index 7870666025752..a7ba762b10649 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -12,9 +12,9 @@ entry:
 
 ; r with long long
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK:	#NO_APP
-  tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+  tail call i64 asm sideeffect "addiu $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 7bb4adc31bd81..6512851a11beb 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -12,9 +12,9 @@ define i32 @constraint_X() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_X:
 ;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
+;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
 ;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -23,9 +23,9 @@ define i32 @constraint_x() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_x:
 ;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd
+;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffd
 ;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -34,9 +34,9 @@ define i32 @constraint_d() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_d:
 ;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -45,9 +45,9 @@ define i32 @constraint_m() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_m:
 ;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-4
+;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-4
 ;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -56,15 +56,15 @@ define i32 @constraint_z() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL: constraint_z:
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
 
 ; z with 0
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},$0
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},$0
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
   ret i32 0
 }
 
@@ -73,9 +73,9 @@ define i32 @constraint_longlong() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL: constraint_longlong:
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+  tail call i64 asm sideeffect "addiu $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 8d30f45d84e3d..76b73dc276aef 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -5,21 +5,21 @@ entry:
 
 ; First I with short
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},4096
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},4096
 ; CHECK: #NO_APP
-  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
 
 ; Then I with int
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP
-   tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
+   tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
 
 ; Now J with 0
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},0
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},0
 ; CHECK: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
 
 ; Now K with 64
 ; CHECK: #APP
@@ -35,29 +35,29 @@ entry:
 
 ; Now N with -3
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
 
 ; Now O with -3
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
 
 ; Now P with 65535
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},65535
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},65535
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
 
 ; Now R Which takes the address of c
   %c = alloca i32, align 4
   store i32 -4469539, i32* %c, align 4
-  %8 = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
+  %8 = call i32 asm sideeffect "lw $0, 1 + $1\0A\09lw $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
 ; CHECK: #APP
-; CHECK: lwl ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
-; CHECK: lwr ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
+; CHECK: lw ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
+; CHECK: lw ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
 ; CHECK: #NO_APP	
 
   ret i32 0
diff --git a/test/CodeGen/Mips/int-to-float-conversion.ll b/test/CodeGen/Mips/int-to-float-conversion.ll
index c2baf442f4ae1..d226b48cb20f3 100644
--- a/test/CodeGen/Mips/int-to-float-conversion.ll
+++ b/test/CodeGen/Mips/int-to-float-conversion.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
diff --git a/test/CodeGen/Mips/l3mc.ll b/test/CodeGen/Mips/l3mc.ll
new file mode 100644
index 0000000000000..3bfb389ba05da
--- /dev/null
+++ b/test/CodeGen/Mips/l3mc.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck  %s -check-prefix=__call_stub_fp___fixunsdfsi 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf 
+
+@ll1 = global i64 0, align 8
+@ll2 = global i64 0, align 8
+@ll3 = global i64 0, align 8
+@l1 = global i32 0, align 4
+@l2 = global i32 0, align 4
+@l3 = global i32 0, align 4
+@ull1 = global i64 0, align 8
+@ull2 = global i64 0, align 8
+@ull3 = global i64 0, align 8
+@ul1 = global i32 0, align 4
+@ul2 = global i32 0, align 4
+@ul3 = global i32 0, align 4
+@d1 = global double 0.000000e+00, align 8
+@d2 = global double 0.000000e+00, align 8
+@d3 = global double 0.000000e+00, align 8
+@d4 = global double 0.000000e+00, align 8
+@f1 = global float 0.000000e+00, align 4
+@f2 = global float 0.000000e+00, align 4
+@f3 = global float 0.000000e+00, align 4
+@f4 = global float 0.000000e+00, align 4
+
+; Function Attrs: nounwind
+define void @_Z3foov() #0 {
+entry:
+  %0 = load double* @d1, align 8
+  %conv = fptosi double %0 to i64
+  store i64 %conv, i64* @ll1, align 8
+  %1 = load double* @d2, align 8
+  %conv1 = fptoui double %1 to i64
+  store i64 %conv1, i64* @ull1, align 8
+  %2 = load float* @f1, align 4
+  %conv2 = fptosi float %2 to i64
+  store i64 %conv2, i64* @ll2, align 8
+  %3 = load float* @f2, align 4
+  %conv3 = fptoui float %3 to i64
+  store i64 %conv3, i64* @ull2, align 8
+  %4 = load double* @d3, align 8
+  %conv4 = fptosi double %4 to i32
+  store i32 %conv4, i32* @l1, align 4
+  %5 = load double* @d4, align 8
+  %conv5 = fptoui double %5 to i32
+  store i32 %conv5, i32* @ul1, align 4
+  %6 = load float* @f3, align 4
+  %conv6 = fptosi float %6 to i32
+  store i32 %conv6, i32* @l2, align 4
+  %7 = load float* @f4, align 4
+  %conv7 = fptoui float %7 to i32
+  store i32 %conv7, i32* @ul2, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @_Z3goov() #0 {
+entry:
+  %0 = load i64* @ll1, align 8
+  %conv = sitofp i64 %0 to double
+  store double %conv, double* @d1, align 8
+  %1 = load i64* @ull1, align 8
+  %conv1 = uitofp i64 %1 to double
+  store double %conv1, double* @d2, align 8
+  %2 = load i64* @ll2, align 8
+  %conv2 = sitofp i64 %2 to float
+  store float %conv2, float* @f1, align 4
+  %3 = load i64* @ull2, align 8
+  %conv3 = uitofp i64 %3 to float
+  store float %conv3, float* @f2, align 4
+  %4 = load i32* @l1, align 4
+  %conv4 = sitofp i32 %4 to double
+  store double %conv4, double* @d3, align 8
+  %5 = load i32* @ul1, align 4
+  %conv5 = uitofp i32 %5 to double
+  store double %conv5, double* @d4, align 8
+  %6 = load i32* @l2, align 4
+  %conv6 = sitofp i32 %6 to float
+  store float %conv6, float* @f3, align 4
+  %7 = load i32* @ul2, align 4
+  %conv7 = uitofp i32 %7 to float
+  store float %conv7, float* @f4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; __call_stub_fp___fixunsdfsi:  __call_stub_fp___fixunsdfsi:
+; __call_stub_fp___floatdidf:  __call_stub_fp___floatdidf:
+; __call_stub_fp___floatdisf:  __call_stub_fp___floatdisf:
+; __call_stub_fp___floatundidf:  __call_stub_fp___floatundidf:
+; __call_stub_fp___fixsfdi:  __call_stub_fp___fixsfdi:
+; __call_stub_fp___fixunsdfdi:  __call_stub_fp___fixunsdfdi:
+; __call_stub_fp___fixdfdi:  __call_stub_fp___fixdfdi:
+; __call_stub_fp___fixunssfsi:  __call_stub_fp___fixunssfsi:
+; __call_stub_fp___fixunssfdi:  __call_stub_fp___fixunssfdi:
+; __call_stub_fp___floatundisf:  __call_stub_fp___floatundisf:
+
diff --git a/test/CodeGen/Mips/largefr1.ll b/test/CodeGen/Mips/largefr1.ll
deleted file mode 100644
index 9a5fd08d17acc..0000000000000
--- a/test/CodeGen/Mips/largefr1.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
-
-
-@i = common global i32 0, align 4
-@j = common global i32 0, align 4
-@.str = private unnamed_addr constant [8 x i8] c"%i %i \0A\00", align 1
-
-define void @foo(i32* %p, i32 %i, i32 %j) nounwind {
-entry:
-  %p.addr = alloca i32*, align 4
-  %i.addr = alloca i32, align 4
-  %j.addr = alloca i32, align 4
-  store i32* %p, i32** %p.addr, align 4
-  store i32 %i, i32* %i.addr, align 4
-  store i32 %j, i32* %j.addr, align 4
-  %0 = load i32* %j.addr, align 4
-  %1 = load i32** %p.addr, align 4
-  %2 = load i32* %i.addr, align 4
-  %add.ptr = getelementptr inbounds i32* %1, i32 %2
-  store i32 %0, i32* %add.ptr, align 4
-  ret void
-}
-
-define i32 @main() nounwind {
-entry:
-; 1-LABEL: main:
-; 1: 1: 	.word	-798000
-; 1:            lw ${{[0-9]+}}, 1f
-; 1:            b 2f
-; 1:            .align 2
-; 1:            .word	800020
-
-; 1:            b 2f
-; 1:            .align 2
-; 1:            .word	400020
-
-; 1:            move ${{[0-9]+}}, $sp
-; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 1:            addiu ${{[0-9]+}}, ${{[0-9]+}}, 0
-
-
-
-; 1:            b 2f
-; 1:            .align 2
-; 1:            .word	400220
-
-; 1:            move ${{[0-9]+}}, $sp
-; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 1:           	lw	${{[0-9]+}}, 0(${{[0-9]+}})
-
-
-
-
-  %retval = alloca i32, align 4
-  %one = alloca [100000 x i32], align 4
-  %two = alloca [100000 x i32], align 4
-  store i32 0, i32* %retval
-  %arrayidx = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 0
-  call void @foo(i32* %arrayidx, i32 50, i32 9999)
-  %arrayidx1 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 0
-  call void @foo(i32* %arrayidx1, i32 99999, i32 5555)
-  %arrayidx2 = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 50
-  %0 = load i32* %arrayidx2, align 4
-  store i32 %0, i32* @i, align 4
-  %arrayidx3 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 99999
-  %1 = load i32* %arrayidx3, align 4
-  store i32 %1, i32* @j, align 4
-  %2 = load i32* @i, align 4
-  %3 = load i32* @j, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3)
-  ret i32 0
-}
-
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 09fee3d9063fb..0e9c91fb46df4 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | \
+; RUN:     FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \
-; RUN: FileCheck %s -check-prefix=64
+; RUN:     FileCheck %s -check-prefix=64
 
 %struct.S1 = type { [65536 x i8] }
 
diff --git a/test/CodeGen/Mips/lcb2.ll b/test/CodeGen/Mips/lcb2.ll
new file mode 100644
index 0000000000000..715584b6797d2
--- /dev/null
+++ b/test/CodeGen/Mips/lcb2.ll
@@ -0,0 +1,133 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcb
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcbn
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @bnez() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !5
+  store i32 0, i32* @i, align 4, !tbaa !1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 0
+}
+; lcb: 	.ent	bnez
+; lcbn:	.ent	bnez
+; lcb:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}
+; lcbn-NOT: bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+; lcb: 	.end	bnez
+; lcbn:	.end	bnez
+
+; Function Attrs: nounwind optsize
+define i32 @beqz() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !6
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 55, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !7
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+}
+
+; lcb: 	.ent	beqz
+; lcbn:	.ent	beqz
+; lcb:	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}
+; lcbn-NOT: beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+; lcb: 	.end	beqz
+; lcbn:	.end	beqz
+
+
+; Function Attrs: nounwind optsize
+define void @bteqz() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @k, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !8
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !9
+  store i32 2, i32* @k, align 4, !tbaa !1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; lcb: 	.ent	bteqz
+; lcbn:	.ent	bteqz
+; lcb:	btnez	$BB{{[0-9]+}}_{{[0-9]+}}
+; lcbn-NOT: btnez	$BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst
+; lcb: 	.end	bteqz
+; lcbn:	.end	bteqz
+
+
+; Function Attrs: nounwind optsize
+define void @btz() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp1 = icmp sgt i32 %0, %1
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry, %if.then
+  tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !10
+  %2 = load i32* @i, align 4, !tbaa !1
+  %3 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp sgt i32 %2, %3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; lcb: 	.ent	btz
+; lcbn:	.ent	btz
+; lcb:	bteqz	$BB{{[0-9]+}}_{{[0-9]+}}
+; lcbn-NOT: bteqz	$BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst
+; lcb:	btnez	$BB{{[0-9]+}}_{{[0-9]+}}
+; lcbn-NOT: btnez	$BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst
+; lcb: 	.end	btz
+; lcbn:	.end	btz
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5 (gitosis@dmz-portal.mips.com:clang.git ed197d08c90d82e1119774e10920e6f7a841c8ec) (gitosis@dmz-portal.mips.com:llvm.git b9235a363fa2dddb26ac01cbaed58efbc9eff392)"}
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"int", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{i32 59}
+!6 = metadata !{i32 156}
+!7 = metadata !{i32 210}
+!8 = metadata !{i32 299}
+!9 = metadata !{i32 340}
+!10 = metadata !{i32 412}
diff --git a/test/CodeGen/Mips/lcb3c.ll b/test/CodeGen/Mips/lcb3c.ll
new file mode 100644
index 0000000000000..72a0b8cf5cea5
--- /dev/null
+++ b/test/CodeGen/Mips/lcb3c.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -O0    < %s | FileCheck %s -check-prefix=lcb
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @s() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* @i, align 4
+  call void asm sideeffect ".space 1000", ""() #1, !srcloc !1
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @i, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+; lcb:	bnez	$2, $BB0_2
+; lcb:	b	$BB0_1 # 16 bit inst
+; lcb: $BB0_1:                                 # %if.then
+}
+
+; Function Attrs: nounwind
+define i32 @b() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* @i, align 4
+  call void asm sideeffect ".space 1000000", ""() #1, !srcloc !2
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @i, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+}
+
+; lcb:	beqz	$2, $BB1_1  # 16 bit inst
+; lcb:	jal	$BB1_2	# branch
+; lcb: $BB1_1:                                 # %if.then
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+
+!1 = metadata !{i32 65}
+!2 = metadata !{i32 167}
diff --git a/test/CodeGen/Mips/lcb4a.ll b/test/CodeGen/Mips/lcb4a.ll
new file mode 100644
index 0000000000000..e37feca781794
--- /dev/null
+++ b/test/CodeGen/Mips/lcb4a.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @foo() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !5
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !6
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ]
+  store i32 %storemerge, i32* @i, align 4, !tbaa !1
+  ret i32 0
+}
+
+; ci:	beqz	$3, $BB0_2
+; ci: # BB#1:                                 # %if.else
+
+
+; Function Attrs: nounwind optsize
+define i32 @goo() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !7
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !8
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ]
+  store i32 %storemerge, i32* @i, align 4, !tbaa !1
+  ret i32 0
+}
+
+; ci:	bnez	$3, $BB1_1  # 16 bit inst
+; ci:	jal	$BB1_2	# branch
+; ci:	nop
+; ci: $BB1_1:                                 # %if.else
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"int", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{i32 58}
+!6 = metadata !{i32 108}
+!7 = metadata !{i32 190}
+!8 = metadata !{i32 243}
diff --git a/test/CodeGen/Mips/lcb5.ll b/test/CodeGen/Mips/lcb5.ll
new file mode 100644
index 0000000000000..0a89c804945f1
--- /dev/null
+++ b/test/CodeGen/Mips/lcb5.ll
@@ -0,0 +1,240 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @x0() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !5
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !6
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ]
+  store i32 %storemerge, i32* @i, align 4, !tbaa !1
+  ret i32 0
+}
+
+; ci:	.ent	x0
+; ci: 	beqz	$3, $BB0_2
+; ci: $BB0_2:
+; ci:	.end	x0
+
+; Function Attrs: nounwind optsize
+define i32 @x1() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !7
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !8
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ]
+  store i32 %storemerge, i32* @i, align 4, !tbaa !1
+  ret i32 0
+}
+
+; ci:	.ent	x1
+; ci:	bnez	$3, $BB1_1  # 16 bit inst
+; ci:	jal	$BB1_2	# branch
+; ci:	nop
+; ci: $BB1_1:
+; ci:	.end	x1
+
+; Function Attrs: nounwind optsize
+define i32 @y0() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !9
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 55, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !10
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+}
+
+; ci:	.ent	y0
+; ci:	beqz	$2, $BB2_2
+; ci:	.end	y0
+
+; Function Attrs: nounwind optsize
+define i32 @y1() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !11
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 55, i32* @j, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !12
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+}
+
+; ci:	.ent	y1
+; ci:	bnez	$2, $BB3_1  # 16 bit inst
+; ci:	jal	$BB3_2	# branch
+; ci:	nop
+; ci: $BB3_1:
+; ci:	.end	y1
+
+; Function Attrs: nounwind optsize
+define void @z0() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @k, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !13
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 10004", ""() #1, !srcloc !14
+  store i32 2, i32* @k, align 4, !tbaa !1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; ci:	.ent	z0
+; ci:	btnez	$BB4_2
+; ci:	.end	z0
+
+; Function Attrs: nounwind optsize
+define void @z1() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @k, align 4, !tbaa !1
+  tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !15
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void asm sideeffect ".space 10000004", ""() #1, !srcloc !16
+  store i32 2, i32* @k, align 4, !tbaa !1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; ci:	.ent	z1
+; ci:	bteqz	$BB5_1  # 16 bit inst
+; ci:	jal	$BB5_2	# branch
+; ci:	nop
+; ci: $BB5_1:
+; ci:	.end	z1
+
+; Function Attrs: nounwind optsize
+define void @z3() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp1 = icmp sgt i32 %0, %1
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry, %if.then
+  tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !17
+  %2 = load i32* @i, align 4, !tbaa !1
+  %3 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp sgt i32 %2, %3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; ci:	.ent	z3
+; ci:	bteqz	$BB6_2
+; ci:	.end	z3
+
+; Function Attrs: nounwind optsize
+define void @z4() #0 {
+entry:
+  %0 = load i32* @i, align 4, !tbaa !1
+  %1 = load i32* @j, align 4, !tbaa !1
+  %cmp1 = icmp sgt i32 %0, %1
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry, %if.then
+  tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !18
+  %2 = load i32* @i, align 4, !tbaa !1
+  %3 = load i32* @j, align 4, !tbaa !1
+  %cmp = icmp sgt i32 %2, %3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; ci:	.ent	z4
+; ci:	btnez	$BB7_1  # 16 bit inst
+; ci:	jal	$BB7_2	# branch
+; ci:	nop
+; ci:	.align	2
+; ci: $BB7_1:
+; ci:	.end	z4
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"int", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{i32 57}
+!6 = metadata !{i32 107}
+!7 = metadata !{i32 188}
+!8 = metadata !{i32 241}
+!9 = metadata !{i32 338}
+!10 = metadata !{i32 391}
+!11 = metadata !{i32 477}
+!12 = metadata !{i32 533}
+!13 = metadata !{i32 621}
+!14 = metadata !{i32 663}
+!15 = metadata !{i32 747}
+!16 = metadata !{i32 792}
+!17 = metadata !{i32 867}
+!18 = metadata !{i32 953}
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index 1fa54b428cd97..a3183a25afaa9 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
new file mode 100644
index 0000000000000..4cbf43cae28e4
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -0,0 +1,166 @@
+; Test the 'call' instruction and the tailcall variant.
+
+; FIXME: We should remove the need for -enable-mips-tail-calls
+; RUN: llc -march=mips   -mcpu=mips32   -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips   -mcpu=mips32r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips   -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips64 -mcpu=mips4    -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64   -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+
+declare void @extern_void_void()
+declare i32 @extern_i32_void()
+declare float @extern_float_void()
+
+define i32 @call_void_void() {
+; ALL-LABEL: call_void_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+  call void @extern_void_void()
+  ret i32 0
+}
+
+define i32 @call_i32_void() {
+; ALL-LABEL: call_i32_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+  %1 = call i32 @extern_i32_void()
+  %2 = add i32 %1, 1
+  ret i32 %2
+}
+
+define float @call_float_void() {
+; ALL-LABEL: call_float_void:
+
+; FIXME: Not sure why we don't use $gp directly on such a simple test. We should
+;        look into it at some point.
+; O32:           addu $[[GP:[0-9]+]], ${{[0-9]+}}, $25
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_float_void)($[[GP]])
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+; O32:           move $gp, $[[GP]]
+
+  %1 = call float @extern_float_void()
+  %2 = fadd float %1, 1.0
+  ret float %2
+}
+
+define void @musttail_call_void_void() {
+; ALL-LABEL: musttail_call_void_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  musttail call void @extern_void_void()
+  ret void
+}
+
+define i32 @musttail_call_i32_void() {
+; ALL-LABEL: musttail_call_i32_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  %1 = musttail call i32 @extern_i32_void()
+  ret i32 %1
+}
+
+define float @musttail_call_float_void() {
+; ALL-LABEL: musttail_call_float_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  %1 = musttail call float @extern_float_void()
+  ret float %1
+}
+
+define i32 @indirect_call_void_void(void ()* %addr) {
+; ALL-LABEL: indirect_call_void_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  call void %addr()
+  ret i32 0
+}
+
+define i32 @indirect_call_i32_void(i32 ()* %addr) {
+; ALL-LABEL: indirect_call_i32_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  %1 = call i32 %addr()
+  %2 = add i32 %1, 1
+  ret i32 %2
+}
+
+define float @indirect_call_float_void(float ()* %addr) {
+; ALL-LABEL: indirect_call_float_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  %1 = call float %addr()
+  %2 = fadd float %1, 1.0
+  ret float %2
+}
+
+; We can't use 'musttail' here because the verifier is too conservative and
+; prohibits any prototype difference.
+define void @tail_indirect_call_void_void(void ()* %addr) {
+; ALL-LABEL: tail_indirect_call_void_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  tail call void %addr()
+  ret void
+}
+
+define i32 @tail_indirect_call_i32_void(i32 ()* %addr) {
+; ALL-LABEL: tail_indirect_call_i32_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  %1 = tail call i32 %addr()
+  ret i32 %1
+}
+
+define float @tail_indirect_call_float_void(float ()* %addr) {
+; ALL-LABEL: tail_indirect_call_float_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  %1 = tail call float %addr()
+  ret float %1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
new file mode 100644
index 0000000000000..d8fd78774553c
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
@@ -0,0 +1,34 @@
+; Test all important variants of the unconditional 'br' instruction.
+
+; RUN: llc -march=mips   -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
+; RUN: llc -march=mips64 -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
+
+define i32 @br(i8 *%addr) {
+; ALL-LABEL: br:
+; NOT-R6:        jr $4 # <MCInst #{{[0-9]+}} JR
+; R6:            jr $4 # <MCInst #{{[0-9]+}} JALR
+
+; ALL: $BB0_1: # %L1
+; NOT-R6:        jr $ra # <MCInst #{{[0-9]+}} JR
+; R6:            jr $ra # <MCInst #{{[0-9]+}} JALR
+; ALL:           addiu $2, $zero, 0
+
+; ALL: $BB0_2: # %L2
+; NOT-R6:        jr $ra # <MCInst #{{[0-9]+}} JR
+; R6:            jr $ra # <MCInst #{{[0-9]+}} JALR
+; ALL:           addiu $2, $zero, 1
+
+entry:
+  indirectbr i8* %addr, [label %L1, label %L2]
+
+L1:
+  ret i32 0
+
+L2:
+  ret i32 1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/ret.ll b/test/CodeGen/Mips/llvm-ir/ret.ll
new file mode 100644
index 0000000000000..8f5b1159760c4
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/ret.ll
@@ -0,0 +1,205 @@
+; Test all important variants of the 'ret' instruction.
+;
+; For non-void returns it is necessary to have something to return so we also
+; test constant generation here.
+;
+; We'll test pointer returns in a separate file since the relocation model
+; affects it and it's undesirable to repeat the non-pointer returns for each
+; relocation model.
+
+; RUN: llc -march=mips   -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=NO-MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=R6
+; RUN: llc -march=mips64 -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=R6
+
+define void @ret_void() {
+; ALL-LABEL: ret_void:
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret void
+}
+
+define i8 @ret_i8() {
+; ALL-LABEL: ret_i8:
+; ALL-DAG:       addiu $2, $zero, 3
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i8 3
+}
+
+define i16 @ret_i16_3() {
+; ALL-LABEL: ret_i16_3:
+; ALL-DAG:       addiu $2, $zero, 3
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 3
+}
+
+define i16 @ret_i16_256() {
+; ALL-LABEL: ret_i16_256:
+; ALL-DAG:       addiu $2, $zero, 256
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 256
+}
+
+define i16 @ret_i16_257() {
+; ALL-LABEL: ret_i16_257:
+; ALL-DAG:       addiu $2, $zero, 257
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 257
+}
+
+define i32 @ret_i32_257() {
+; ALL-LABEL: ret_i32_257:
+; ALL-DAG:       addiu $2, $zero, 257
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 257
+}
+
+define i32 @ret_i32_65536() {
+; ALL-LABEL: ret_i32_65536:
+; ALL-DAG:       lui $2, 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 65536
+}
+
+define i32 @ret_i32_65537() {
+; ALL-LABEL: ret_i32_65537:
+; ALL:           lui $[[T0:[0-9]+]], 1
+; ALL-DAG:       ori $2, $[[T0]], 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 65537
+}
+
+define i64 @ret_i64_65537() {
+; ALL-LABEL: ret_i64_65537:
+; ALL:           lui $[[T0:[0-9]+]], 1
+
+; GPR32-DAG:     ori $3, $[[T0]], 1
+; GPR32-DAG:     addiu $2, $zero, 0
+
+; GPR64-DAG:     daddiu $2, $[[T0]], 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 65537
+}
+
+define i64 @ret_i64_281479271677952() {
+; ALL-LABEL: ret_i64_281479271677952:
+; ALL-DAG:       lui $[[T0:[0-9]+]], 1
+
+; GPR32-DAG:     ori $2, $[[T0]], 1
+; GPR32-DAG:     addiu $3, $zero, 0
+
+; GPR64-DAG:     daddiu $[[T1:[0-9]+]], $[[T0]], 1
+; GPR64-DAG:     dsll $2, $[[T1]], 32
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 281479271677952
+}
+
+define i64 @ret_i64_281479271809026() {
+; ALL-LABEL: ret_i64_281479271809026:
+; GPR32-DAG:     lui $[[T0:[0-9]+]], 1
+; GPR32-DAG:     lui $[[T1:[0-9]+]], 2
+; GPR32-DAG:     ori $2, $[[T0]], 1
+; GPR32-DAG:     ori $3, $[[T1]], 2
+
+; GPR64-DAG:     ori  $[[T0:[0-9]+]], $zero, 32769
+; GPR64-DAG:     dsll $[[T1:[0-9]+]], $[[T0]], 16
+; GPR64-DAG:     daddiu $[[T0:[0-9]+]], $[[T0]], -32767
+; GPR64-DAG:     dsll $[[T1:[0-9]+]], $[[T0]], 17
+; GPR64-DAG:     daddiu $2, $[[T1]], 2
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 281479271809026
+}
+
+define float @ret_float_0x0() {
+; ALL-LABEL: ret_float_0x0:
+
+; NO-MTHC1-DAG:  mtc1 $zero, $f0
+
+; MTHC1-DAG:     mtc1 $zero, $f0
+
+; DMTC-DAG:      dmtc1 $zero, $f0
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret float 0x0000000000000000
+}
+
+define float @ret_float_0x3() {
+; ALL-LABEL: ret_float_0x3:
+
+; Use a constant pool
+; O32-DAG:       lwc1 $f0, %lo($CPI
+; N64-DAG:       lwc1 $f0, %got_ofst($CPI
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+; float constants are written as double constants
+  ret float 0x36b8000000000000
+}
+
+define double @ret_double_0x0() {
+; ALL-LABEL: ret_double_0x0:
+
+; NO-MTHC1-DAG:  mtc1 $zero, $f0
+; NO-MTHC1-DAG:  mtc1 $zero, $f1
+
+; MTHC1-DAG:     mtc1 $zero, $f0
+; MTHC1-DAG:     mthc1 $zero, $f0
+
+; DMTC-DAG:      dmtc1 $zero, $f0
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret double 0x0000000000000000
+}
+
+define double @ret_double_0x3() {
+; ALL-LABEL: ret_double_0x3:
+
+; Use a constant pool
+; O32-DAG:       ldc1 $f0, %lo($CPI
+; N64-DAG:       ldc1 $f0, %got_ofst($CPI
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret double 0x0000000000000003
+}
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
index d0928ee266131..a3f5ebfb54607 100644
--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -1,29 +1,439 @@
-; RUN: llc -march=mipsel < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips < %s | FileCheck  -check-prefix=EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EB %s
+; RUN: llc -march=mips64el -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s
 
+%struct.SLL = type { i64 }
 %struct.SI = type { i32 }
+%struct.SUI = type { i32 }
 
+@sll = common global %struct.SLL zeroinitializer, align 1
 @si = common global %struct.SI zeroinitializer, align 1
+@sui = common global %struct.SUI zeroinitializer, align 1
 
-define i32 @foo_load_i() nounwind readonly {
+define i32 @load_SI() nounwind readonly {
 entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
+; ALL-LABEL: load_SI:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      lw $2, 0($[[PTR]])
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
 
   %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret i32 %0
 }
 
-define void @foo_store_i(i32 %a) nounwind {
+define void @store_SI(i32 %a) nounwind {
 entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
+; ALL-LABEL: store_SI:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
 
   store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret void
 }
 
+define i64 @load_SLL() nounwind readonly {
+entry:
+; ALL-LABEL: load_SLL:
+
+; MIPS32-EL:     lwl $2, 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $2, 0($[[R1]])
+; MIPS32-EL:     lwl $3, 7($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $3, 4($[[R1]])
+
+; MIPS32-EB:     lwl $2, 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $2, 3($[[R1]])
+; MIPS32-EB:     lwl $3, 4($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $3, 7($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  lw $2, 0($[[PTR]])
+; MIPS32R6-DAG:  lw $3, 4($[[PTR]])
+
+; MIPS64-EL:     ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     ldr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     ldr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      ld $2, 0($[[PTR]])
+
+  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret i64 %0
+}
+
+define i64 @load_SI_sext_to_i64() nounwind readonly {
+entry:
+; ALL-LABEL: load_SI_sext_to_i64:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6-EL:   lw $2, 0($[[PTR]])
+; MIPS32R6-EL:   sra $3, $2, 31
+; MIPS32R6-EB:   lw $3, 0($[[PTR]])
+; MIPS32R6-EB:   sra $2, $3, 31
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @load_UI() nounwind readonly {
+entry:
+; ALL-LABEL: load_UI:
+
+; MIPS32-EL-DAG: lwl $[[R2:2]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: lwr $[[R2]],   0($[[R1]])
+; MIPS32-EL-DAG: addiu $3, $zero, 0
+
+; MIPS32-EB-DAG: lwl $[[R2:3]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: lwr $[[R2]],   3($[[R1]])
+; MIPS32-EB-DAG: addiu $2, $zero, 0
+
+; MIPS32R6:        lw $[[PTR:[0-9]+]], %got(sui)(
+; MIPS32R6-EL-DAG: lw $2, 0($[[PTR]])
+; MIPS32R6-EL-DAG: addiu $3, $zero, 0
+; MIPS32R6-EB-DAG: lw $3, 0($[[PTR]])
+; MIPS32R6-EB-DAG: addiu $2, $zero, 0
+
+; MIPS64-EL-DAG: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL-DAG: lwr $[[R0]], 0($[[R1]])
+; MIPS64-EL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; MIPS64-EL-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 32
+; MIPS64-EL-DAG: daddiu $[[R4:[0-9]+]], $[[R3]], -1
+; MIPS64-EL-DAG: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sui)(
+; MIPS64R6:      lwu $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+define void @store_SLL(i64 %a) nounwind {
+entry:
+; ALL-LABEL: store_SLL:
+
+; MIPS32-EL-DAG: swl $[[A1:4]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A1]],   0($[[R1]])
+; MIPS32-EL-DAG: swl $[[A2:5]], 7($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A2]],   4($[[R1]])
+
+; MIPS32-EB-DAG: swl $[[A1:4]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   3($[[R1]])
+; MIPS32-EB-DAG: swl $[[A1:5]], 4($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   7($[[R1]])
+
+; MIPS32R6-DAG:  lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  sw $4, 0($[[PTR]])
+; MIPS32R6-DAG:  sw $5, 4($[[PTR]])
+
+; MIPS64-EL:     sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     sdr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     sdr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      sd $4, 0($[[PTR]])
+
+  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret void
+}
+
+define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+entry:
+; ALL-LABEL: store_SI_trunc_from_i64:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
+
+  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  ret void
+}
+
+;
+; Structures are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+%struct.S0 = type { i8, i8 }
+@struct_s0 = common global %struct.S0 zeroinitializer, align 1
+%struct.S1 = type { i16, i16 }
+@struct_s1 = common global %struct.S1 zeroinitializer, align 1
+%struct.S2 = type { i32, i32 }
+@struct_s2 = common global %struct.S2 zeroinitializer, align 1
+
+define void @copy_struct_S0() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S0:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+
+; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since
+;        we have unaligned halfword load/store available
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 2($[[PTR]])
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 3($[[PTR]])
+
+  %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
+  store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S1() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S1:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+  %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
+  store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S2() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S2:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+  %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
+  store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1
+  ret void
+}
+
+;
+; Arrays are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+@arr = common global [7 x i8] zeroinitializer, align 1
+
+define void @pass_array_byval() nounwind {
+entry:
+; ALL-LABEL: pass_array_byval:
+
+; MIPS32-EL:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R3:[0-9]+]], 5($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EL-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EL-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T2:[0-9]+]], $[[R4]], 16
+; MIPS32-EL-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32-EB:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],   3($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EB-DAG: sll $[[T1]], $[[T1]], 16
+; MIPS32-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T2:[0-9]+]], $[[R4]], 8
+; MIPS32-EB-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32R6:        lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32R6-DAG:    lw $4, 0($[[PTR]])
+; MIPS32R6-EL-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EL-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[R2]], $[[T0]]
+
+; MIPS32R6-EB-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EB-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EB-DAG: sll $[[T0:[0-9]+]], $[[R2]], 16
+; MIPS32R6-EB-DAG: or  $5, $[[T0]], $[[R3]]
+
+; MIPS64-EL:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+
+; MIPS64-EB:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EB-DAG: lwl  $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr  $[[R1]],   3($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32
+; MIPS64-EB-DAG: lbu  $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS64-EB-DAG: lbu  $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS64-EB-DAG: or   $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16
+; MIPS64-EB-DAG: or   $[[T3:[0-9]+]], $[[R1]], $[[T1]]
+; MIPS64-EB-DAG: lbu  $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8
+; MIPS64-EB-DAG: or   $4, $[[T3]], $[[T4]]
+
+; MIPS64R6:      ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+
+  tail call void @extern_func([7 x i8]* byval @arr) nounwind
+  ret void
+}
+
+declare void @extern_func([7 x i8]* byval)
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index af192d0e92170..a403744c8fd5a 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -1,35 +1,163 @@
-; RUN: llc -march=mipsel -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -force-mips-long-branch -O3 < %s \
+; RUN:   | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN:   < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN:   < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=micromips \
+; RUN:   -force-mips-long-branch -O3 < %s | FileCheck %s -check-prefix=MICROMIPS
+; RUN: llc -mtriple=mipsel-none-nacl -force-mips-long-branch -O3 < %s \
+; RUN:   | FileCheck %s -check-prefix=NACL
 
-@g0 = external global i32
 
-define void @foo1(i32 %s) nounwind {
+@x = external global i32
+
+define void @test1(i32 %s) {
 entry:
-; O32: nop
-; O32: addiu $sp, $sp, -8
-; O32: bal
-; O32: lui $1, 0
-; O32: addiu $1, $1, {{[0-9]+}} 
-; N64: nop
-; N64: daddiu $sp, $sp, -16
-; N64: lui $1, 0
-; N64: daddiu $1, $1, 0
-; N64: dsll $1, $1, 16
-; N64: daddiu $1, $1, 0
-; N64: bal
-; N64: dsll $1, $1, 16
-; N64: daddiu $1, $1, {{[0-9]+}}  
-
-  %tobool = icmp eq i32 %s, 0
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %0 = load i32* @g0, align 4
-  %add = add nsw i32 %0, 12
-  store i32 %add, i32* @g0, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %end, label %then
+
+then:
+  store i32 1, i32* @x, align 4
+  br label %end
+
+end:
   ret void
-}
 
+
+; First check the normal version (without long branch).  beqz jumps to return,
+; and fallthrough block stores 1 to global variable.
+
+; CHECK:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; CHECK:        beqz    $4, $[[BB0:BB[0-9_]+]]
+; CHECK:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+; CHECK:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; CHECK:        addiu   $[[R2:[0-9]+]], $zero, 1
+; CHECK:        sw      $[[R2]], 0($[[R1]])
+; CHECK:   $[[BB0]]:
+; CHECK:        jr      $ra
+; CHECK:        nop
+
+
+; Check the MIPS32 version.  Check that branch logic is inverted, so that the
+; target of the new branch (bnez) is the fallthrough block of the original
+; branch.  Check that fallthrough block of the new branch contains long branch
+; expansion which at the end indirectly jumps to the target of the original
+; branch.
+
+; O32:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; O32:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; O32:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; O32:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; O32:             addiu   $sp, $sp, -8
+; O32-NEXT:        sw      $ra, 0($sp)
+; O32-NEXT:        lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; O32-NEXT:        bal     $[[BB1]]
+; O32-NEXT:        addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; O32-NEXT:   $[[BB1]]:
+; O32-NEXT:        addu    $1, $ra, $1
+; O32-NEXT:        lw      $ra, 0($sp)
+; O32-NEXT:        jr      $1
+; O32-NEXT:        addiu   $sp, $sp, 8
+
+; O32:   $[[BB0]]:
+; O32:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; O32:        addiu   $[[R2:[0-9]+]], $zero, 1
+; O32:        sw      $[[R2]], 0($[[R1]])
+; O32:   $[[BB2]]:
+; O32:        jr      $ra
+; O32:        nop
+
+
+; Check the MIPS64 version.
+
+; N64:        lui     $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1)))
+; N64:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; N64:        daddu   $[[R1:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; N64:           daddiu  $sp, $sp, -16
+; N64-NEXT:      sd      $ra, 0($sp)
+; N64-NEXT:      daddiu  $1, $zero, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; N64-NEXT:      dsll    $1, $1, 16
+; N64-NEXT:      bal     $[[BB1]]
+; N64-NEXT:      daddiu  $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; N64-NEXT:  $[[BB1]]:
+; N64-NEXT:      daddu   $1, $ra, $1
+; N64-NEXT:      ld      $ra, 0($sp)
+; N64-NEXT:      jr      $1
+; N64-NEXT:      daddiu  $sp, $sp, 16
+
+; N64:   $[[BB0]]:
+; N64:        daddiu  $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1)))
+; N64:        ld      $[[R2:[0-9]+]], %got_disp(x)($[[GP]])
+; N64:        addiu   $[[R3:[0-9]+]], $zero, 1
+; N64:        sw      $[[R3]], 0($[[R2]])
+; N64:   $[[BB2]]:
+; N64:        jr      $ra
+; N64:        nop
+
+
+; Check the microMIPS version.
+
+; MICROMIPS:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; MICROMIPS:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; MICROMIPS:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; MICROMIPS:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; MICROMIPS:          addiu   $sp, $sp, -8
+; MICROMIPS-NEXT:     sw      $ra, 0($sp)
+; MICROMIPS-NEXT:     lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; MICROMIPS-NEXT:     bal     $[[BB1]]
+; MICROMIPS-NEXT:     addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; MICROMIPS-NEXT:  $[[BB1]]:
+; MICROMIPS-NEXT:     addu    $1, $ra, $1
+; MICROMIPS-NEXT:     lw      $ra, 0($sp)
+; MICROMIPS-NEXT:     jr      $1
+; MICROMIPS-NEXT:     addiu   $sp, $sp, 8
+
+; MICROMIPS:   $[[BB0]]:
+; MICROMIPS:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; MICROMIPS:        addiu   $[[R2:[0-9]+]], $zero, 1
+; MICROMIPS:        sw      $[[R2]], 0($[[R1]])
+; MICROMIPS:   $[[BB2]]:
+; MICROMIPS:        jr      $ra
+; MICROMIPS:        nop
+
+
+; Check the NaCl version.  Check that sp change is not in the branch delay slot
+; of "jr $1" instruction.  Check that target of indirect branch "jr $1" is
+; bundle aligned.
+
+; NACL:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; NACL:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; NACL:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; NACL:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; NACL:             addiu   $sp, $sp, -8
+; NACL-NEXT:        sw      $ra, 0($sp)
+; NACL-NEXT:        lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; NACL-NEXT:        bal     $[[BB1]]
+; NACL-NEXT:        addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; NACL-NEXT:   $[[BB1]]:
+; NACL-NEXT:        addu    $1, $ra, $1
+; NACL-NEXT:        lw      $ra, 0($sp)
+; NACL-NEXT:        addiu   $sp, $sp, 8
+; NACL-NEXT:        jr      $1
+; NACL-NEXT:        nop
+
+; NACL:        $[[BB0]]:
+; NACL:             lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; NACL:             addiu   $[[R2:[0-9]+]], $zero, 1
+; NACL:             sw      $[[R2]], 0($[[R1]])
+; NACL:             .align  4
+; NACL-NEXT:   $[[BB2]]:
+; NACL:             jr      $ra
+; NACL:             nop
+}
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 0dbb2c27b8f95..82229677ff11c 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -1,9 +1,49 @@
-; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32
-; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc -march=mips -mcpu=mips32 -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc -march=mips -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc -march=mips -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64R6
+
+; FIXME: The MIPS16 test should check its output
 ; RUN: llc -march=mips -mcpu=mips16 < %s
 
-; 32: madd ${{[0-9]+}}
-; DSP: madd $ac
+; ALL-LABEL: madd1:
+
+; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $6
+; 32R6-DAG:      sra  $[[T3:[0-9]+]], $6, 31
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; 32R6-DAG:      muh  $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T5]], $[[T4]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
+; 64-DAG:        daddu $2, $[[T2]], $[[T3]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
+; 64R6-DAG:      daddu $2, $[[T2]], $[[T3]]
+
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -14,8 +54,47 @@ entry:
   ret i64 %add
 }
 
-; 32: maddu ${{[0-9]+}}
-; DSP: maddu $ac
+; ALL-LABEL: madd2:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]addu ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       maddu $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $6
+; FIXME: There's a redundant move here. We should remove it
+; 32R6-DAG:      muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T3]], $[[T2]]
+
+; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
+; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
+; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
+; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
+; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
+; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64-DAG:        daddu $2, $[[T4]], $[[T6]]
+
+; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
+; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
+; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
+; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64R6-DAG:      daddu $2, $[[T4]], $[[T6]]
+
 define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
@@ -26,8 +105,38 @@ entry:
   ret i64 %add
 }
 
-; 32: madd ${{[0-9]+}}
-; DSP: madd $ac
+; ALL-LABEL: madd3:
+
+; 32-DAG:        mthi $6
+; 32-DAG:        mtlo $7
+; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       mthi $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       mtlo $[[AC]], $7
+; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $7
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $7
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T2]], $6
+; 32R6-DAG:      muh  $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T5]], $[[T4]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        daddu $2, $[[T2]], $6
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      daddu $2, $[[T2]], $6
+
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -37,8 +146,41 @@ entry:
   ret i64 %add
 }
 
-; 32: msub ${{[0-9]+}}
-; DSP: msub $ac
+; ALL-LABEL: msub1:
+
+; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muh  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul  $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      sltu $[[T3:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; 32R6-DAG:      sra  $[[T5:[0-9]+]], $6, 31
+; 32R6-DAG:      subu $2, $[[T5]], $[[T4]]
+; 32R6-DAG:      subu $3, $6, $[[T1]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
+; 64-DAG:        dsubu $2, $[[T3]], $[[T2]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
+; 64R6-DAG:      dsubu $2, $[[T3]], $[[T2]]
+
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %c to i64
@@ -49,8 +191,48 @@ entry:
   ret i64 %sub
 }
 
-; 32: msubu ${{[0-9]+}}
-; DSP: msubu $ac
+; ALL-LABEL: msub2:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]subu ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msubu $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muhu $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG:      addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG:      negu $2, $[[T3]]
+; 32R6-DAG:      subu $3, $6, $[[T1]]
+
+; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
+; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
+; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
+; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
+; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
+; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64-DAG:        dsubu $2, $[[T6]], $[[T4]]
+
+; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
+; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
+; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
+; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64R6-DAG:      dsubu $2, $[[T6]], $[[T4]]
+
 define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
@@ -61,8 +243,39 @@ entry:
   ret i64 %sub
 }
 
-; 32: msub ${{[0-9]+}}
-; DSP: msub $ac
+; ALL-LABEL: msub3:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        mthi $6
+; 32-DAG:        mtlo $7
+; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $7, $[[T1]]
+; 32R6-DAG:      addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG:      subu $2, $6, $[[T3]]
+; 32R6-DAG:      subu $3, $7, $[[T1]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        dsubu $2, $6, $[[T2]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      dsubu $2, $6, $[[T2]]
+
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
diff --git a/test/CodeGen/Mips/mature-mc-support.ll b/test/CodeGen/Mips/mature-mc-support.ll
new file mode 100644
index 0000000000000..6e5998d8a7cb7
--- /dev/null
+++ b/test/CodeGen/Mips/mature-mc-support.ll
@@ -0,0 +1,32 @@
+; Test that inline assembly is parsed by the MC layer when MC support is mature
+; (even when the output is assembly).
+; FIXME: Mips doesn't use the integrated assembler by default so we only test
+; that -filetype=obj tries to parse the assembly.
+
+; SKIP: not llc -march=mips < %s > /dev/null 2> %t1
+; SKIP: FileCheck %s < %t1
+
+; RUN: not llc -march=mips -filetype=obj < %s > /dev/null 2> %t2
+; RUN: FileCheck %s < %t2
+
+; SKIP: not llc -march=mipsel < %s > /dev/null 2> %t3
+; SKIP: FileCheck %s < %t3
+
+; RUN: not llc -march=mipsel -filetype=obj < %s > /dev/null 2> %t4
+; RUN: FileCheck %s < %t4
+
+; SKIP: not llc -march=mips64 < %s > /dev/null 2> %t5
+; SKIP: FileCheck %s < %t5
+
+; RUN: not llc -march=mips64 -filetype=obj < %s > /dev/null 2> %t6
+; RUN: FileCheck %s < %t6
+
+; SKIP: not llc -march=mips64el < %s > /dev/null 2> %t7
+; SKIP: FileCheck %s < %t7
+
+; RUN: not llc -march=mips64el -filetype=obj < %s > /dev/null 2> %t8
+; RUN: FileCheck %s < %t8
+
+module asm "	.this_directive_is_very_unlikely_to_exist"
+
+; CHECK: LLVM ERROR: Error parsing inline asm
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
new file mode 100644
index 0000000000000..c80299166ab46
--- /dev/null
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=jal16
+
+@j = global i32 10, align 4
+@.str = private unnamed_addr constant [11 x i8] c"at bottom\0A\00", align 1
+@i = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  br label %z
+
+z:                                                ; preds = %y, %entry
+  %call = call i32 bitcast (i32 (...)* @foo to i32 ()*)()
+  call void asm sideeffect ".space 10000000", ""() #2, !srcloc !1
+  br label %y
+
+y:                                                ; preds = %z
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+  br label %z
+
+return:                                           ; No predecessors!
+  %0 = load i32* %retval
+  ret i32 %0
+; jal16: 	jal	$BB{{[0-9]+}}_{{[0-9]+}}
+}
+
+declare i32 @foo(...) #1
+
+declare i32 @printf(i8*, ...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!1 = metadata !{i32 68}
diff --git a/test/CodeGen/Mips/micromips-atomic.ll b/test/CodeGen/Mips/micromips-atomic.ll
new file mode 100644
index 0000000000000..a50e0b7850c3f
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-atomic.ll
@@ -0,0 +1,18 @@
+; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
+; RUN: -relocation-model=pic -o - | FileCheck %s
+
+@x = common global i32 0, align 4
+
+define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+entry:
+  %0 = atomicrmw add i32* @x, i32 %incr monotonic
+  ret i32 %0
+
+; CHECK-LABEL:   AtomicLoadAdd32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beqz    $[[R2]], $[[BB0]]
+}
diff --git a/test/CodeGen/Mips/micromips-directives.ll b/test/CodeGen/Mips/micromips-directives.ll
new file mode 100644
index 0000000000000..dd0bd5836d28b
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-directives.ll
@@ -0,0 +1,16 @@
+; This test checks if the '.set [no]micromips' directives
+; are emitted before a function's entry label.
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | \
+; RUN:   FileCheck %s -check-prefix=CHECK-MM
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=-micromips %s -o - | \
+; RUN:   FileCheck %s -check-prefix=CHECK-NO-MM
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
+
+; CHECK-MM: .set micromips
+; CHECK-NO-MM: .set nomicromips
+; CHECK: main:
diff --git a/test/CodeGen/Mips/micromips-jal.ll b/test/CodeGen/Mips/micromips-jal.ll
new file mode 100644
index 0000000000000..fccc229197288
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-jal.ll
@@ -0,0 +1,48 @@
+; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
+; RUN:   -relocation-model=static -o - | FileCheck %s
+
+define i32 @sum(i32 %a, i32 %b) nounwind uwtable {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* %y, align 4
+  %1 = load i32* %z, align 4
+  %call = call i32 @sum(i32 %0, i32 %1)
+  store i32 %call, i32* %x, align 4
+  %2 = load i32* %x, align 4
+  ret i32 %2
+}
+
+; CHECK:    .text
+
+; CHECK:    .globl  sum
+; CHECK:    .type sum,@function
+; CHECK:    .set  micromips
+; CHECK:    .ent  sum
+; CHECK-LABEL: sum:
+; CHECK:    .end  sum
+
+; CHECK:    .globl  main
+; CHECK:    .type main,@function
+; CHECK:    .set  micromips
+; CHECK:    .ent  main
+; CHECK-LABEL: main:
+
+; CHECK:    jal sum
+
+; CHECK:    .end main
diff --git a/test/CodeGen/Mips/micromips-load-effective-address.ll b/test/CodeGen/Mips/micromips-load-effective-address.ll
new file mode 100644
index 0000000000000..afba760f0e629
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-load-effective-address.ll
@@ -0,0 +1,29 @@
+; RUN: llc %s -march=mipsel -mattr=micromips -filetype=asm \
+; RUN: -relocation-model=pic -O3 -o - | FileCheck %s
+
+define i32 @sum(i32* %x, i32* %y) nounwind uwtable {
+entry:
+  %x.addr = alloca i32*, align 8
+  %y.addr = alloca i32*, align 8
+  store i32* %x, i32** %x.addr, align 8
+  store i32* %y, i32** %y.addr, align 8
+  %0 = load i32** %x.addr, align 8
+  %1 = load i32* %0, align 4
+  %2 = load i32** %y.addr, align 8
+  %3 = load i32* %2, align 4
+  %add = add nsw i32 %1, %3
+  ret i32 %add
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @sum(i32* %x, i32* %y)
+  ret i32 %call
+}
+
+; CHECK: addiu ${{[0-9]+}}, $sp, {{[0-9]+}}
+; CHECK: addiu ${{[0-9]+}}, $sp, {{[0-9]+}}
diff --git a/test/CodeGen/Mips/mips16-hf-attr.ll b/test/CodeGen/Mips/mips16-hf-attr.ll
new file mode 100644
index 0000000000000..d9ad6295bef8b
--- /dev/null
+++ b/test/CodeGen/Mips/mips16-hf-attr.ll
@@ -0,0 +1,45 @@
+; Check that stubs generation for mips16 hard-float mode does not depend
+; on the function 'use-soft-float' attribute's value.
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
+; RUN:     -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+
+define void @bar_sf() #0 {
+; CHECK: bar_sf:
+entry:
+  %call1 = call float @foo(float 1.000000e+00)
+; CHECK: lw $2, %call16(foo)($3)
+; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3)
+  ret void
+}
+
+define void @bar_hf() #1 {
+; CHECK: bar_hf:
+entry:
+  %call1 = call float @foo(float 1.000000e+00)
+; CHECK: lw $2, %call16(foo)($3)
+; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3)
+  ret void
+}
+
+declare float @foo(float) #2
+
+attributes #0 = {
+  nounwind
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="false"
+}
+attributes #1 = {
+  nounwind
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="true"
+}
+attributes #2 = {
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="true"
+}
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
index e156641d4e508..f6096b402f2d7 100644
--- a/test/CodeGen/Mips/mips16_32_1.ll
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -6,9 +6,8 @@ entry:
   ret void
 }
 
-; CHECK: 	.set	mips16                  # @foo
+; CHECK: 	.set	mips16
 ; CHECK:	.ent	foo
-; CHECK:	save	{{.+}}
-; CHECK:	restore	{{.+}} 
+; CHECK:	jrc $ra
 ; CHECK:	.end	foo
 attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
index 7c017b8e4b755..ff9831ed76226 100644
--- a/test/CodeGen/Mips/mips16_32_10.ll
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -4,7 +4,7 @@ define void @foo() #0 {
 entry:
   ret void
 }
-; 16: 	.set	nomips16                  # @foo
+; 16: 	.set	nomips16
 ; 16: 	.ent	foo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -21,11 +21,10 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @nofoo
+; 16: 	.set	mips16
 ; 16: 	.ent	nofoo
 
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	nofoo
 
 define i32 @main() #2 {
@@ -33,7 +32,7 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	nomips16                  # @main
+; 16: 	.set	nomips16
 ; 16: 	.ent	main
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
index dd94ec1ce80af..c5a29a0b8fdbf 100644
--- a/test/CodeGen/Mips/mips16_32_3.ll
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -6,22 +6,20 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @foo
+; 16: 	.set	mips16
 ; 16: 	.ent	foo
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	foo
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	foo
 define void @nofoo() #1 {
 entry:
   ret void
 }
 
-; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.set	nomips16
 ; 16: 	.ent	nofoo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -32,7 +30,7 @@ entry:
 ; 16:	.set	macro
 ; 16:	.set	reorder
 ; 16:	.end	nofoo
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -48,12 +46,11 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	mips16                  # @main
+; 16: 	.set	mips16
 ; 16: 	.ent	main
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	main
-; 32: 	.set	nomips16                  # @main
+; 32: 	.set	nomips16
 ; 32: 	.ent	main
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
index 5e49071394458..1238363d907e0 100644
--- a/test/CodeGen/Mips/mips16_32_4.ll
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -6,22 +6,20 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @foo
+; 16: 	.set	mips16
 ; 16: 	.ent	foo
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	foo
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	foo
 define void @nofoo() #1 {
 entry:
   ret void
 }
 
-; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.set	nomips16
 ; 16: 	.ent	nofoo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -32,7 +30,7 @@ entry:
 ; 16:	.set	macro
 ; 16:	.set	reorder
 ; 16:	.end	nofoo
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -48,15 +46,13 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	mips16                  # @main
+; 16: 	.set	mips16
 ; 16: 	.ent	main
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	main
-; 32: 	.set	mips16                  # @main
+; 32: 	.set	mips16
 ; 32: 	.ent	main
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	main
 
 
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
index 17900a2dc75fb..5d4c8a1af5630 100644
--- a/test/CodeGen/Mips/mips16_32_5.ll
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -6,22 +6,20 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @foo
+; 16: 	.set	mips16
 ; 16: 	.ent	foo
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	foo
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	foo
 define void @nofoo() #1 {
 entry:
   ret void
 }
 
-; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.set	nomips16
 ; 16: 	.ent	nofoo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -32,7 +30,7 @@ entry:
 ; 16:	.set	macro
 ; 16:	.set	reorder
 ; 16:	.end	nofoo
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -48,7 +46,7 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	nomips16                  # @main
+; 16: 	.set	nomips16
 ; 16: 	.ent	main
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -60,7 +58,7 @@ entry:
 ; 16:	.set	reorder
 ; 16:	.end	main
 
-; 32: 	.set	nomips16                  # @main
+; 32: 	.set	nomips16
 ; 32: 	.ent	main
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
index a77031af8be6d..63323b608bc57 100644
--- a/test/CodeGen/Mips/mips16_32_6.ll
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -6,12 +6,11 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @foo
+; 16: 	.set	mips16
 ; 16: 	.ent	foo
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	foo
-; 32: 	.set	nomips16                  # @foo
+; 32: 	.set	nomips16
 ; 32: 	.ent	foo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -27,7 +26,7 @@ entry:
   ret void
 }
 
-; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.set	nomips16
 ; 16: 	.ent	nofoo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -38,7 +37,7 @@ entry:
 ; 16:	.set	macro
 ; 16:	.set	reorder
 ; 16:	.end	nofoo
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -54,7 +53,7 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	nomips16                  # @main
+; 16: 	.set	nomips16
 ; 16: 	.ent	main
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -66,7 +65,7 @@ entry:
 ; 16:	.set	reorder
 ; 16:	.end	main
 
-; 32: 	.set	nomips16                  # @main
+; 32: 	.set	nomips16
 ; 32: 	.ent	main
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
index 895b5d4346a81..480a23c8b25ea 100644
--- a/test/CodeGen/Mips/mips16_32_7.ll
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -6,12 +6,11 @@ entry:
   ret void
 }
 
-; 16: 	.set	mips16                  # @foo
+; 16: 	.set	mips16
 ; 16: 	.ent	foo
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	foo
-; 32: 	.set	nomips16                  # @foo
+; 32: 	.set	nomips16
 ; 32: 	.ent	foo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -27,7 +26,7 @@ entry:
   ret void
 }
 
-; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.set	nomips16
 ; 16: 	.ent	nofoo
 ; 16:	.set	noreorder
 ; 16:	.set	nomacro
@@ -38,7 +37,7 @@ entry:
 ; 16:	.set	macro
 ; 16:	.set	reorder
 ; 16:	.end	nofoo
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -54,16 +53,14 @@ entry:
   ret i32 0
 }
 
-; 16: 	.set	mips16                  # @main
+; 16: 	.set	mips16
 ; 16: 	.ent	main
-; 16:	save	{{.+}}
-; 16:	restore	{{.+}} 
+; 16:	jrc $ra
 ; 16:	.end	main
 
-; 32: 	.set	mips16                  # @main
+; 32: 	.set	mips16
 ; 32: 	.ent	main
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	main
 
 
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
index 4152d687093e9..2f5bc219cf35e 100644
--- a/test/CodeGen/Mips/mips16_32_8.ll
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -14,10 +14,9 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	foo
 
 define void @nofoo() #1 {
@@ -33,7 +32,7 @@ entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -57,7 +56,7 @@ entry:
   ret i32 0
 }
 
-; 32: 	.set	nomips16                  # @main
+; 32: 	.set	nomips16
 ; 32: 	.ent	main
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
diff --git a/test/CodeGen/Mips/mips16_32_9.ll b/test/CodeGen/Mips/mips16_32_9.ll
index c9b494f2a8904..8543147bed03c 100644
--- a/test/CodeGen/Mips/mips16_32_9.ll
+++ b/test/CodeGen/Mips/mips16_32_9.ll
@@ -5,17 +5,16 @@ entry:
   ret void
 }
 
-; 32: 	.set	mips16                  # @foo
+; 32: 	.set	mips16
 ; 32: 	.ent	foo
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	foo
 define void @nofoo() #1 {
 entry:
   ret void
 }
 
-; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.set	nomips16
 ; 32: 	.ent	nofoo
 ; 32:	.set	noreorder
 ; 32:	.set	nomacro
@@ -31,10 +30,9 @@ entry:
   ret i32 0
 }
 
-; 32: 	.set	mips16                  # @main
+; 32: 	.set	mips16
 ; 32: 	.ent	main
-; 32:	save	{{.+}}
-; 32:	restore	{{.+}} 
+; 32:	jrc $ra
 ; 32:	.end	main
 
 
diff --git a/test/CodeGen/Mips/mips16_fpret.ll b/test/CodeGen/Mips/mips16_fpret.ll
index c132f63cfb016..fe87604d61073 100644
--- a/test/CodeGen/Mips/mips16_fpret.ll
+++ b/test/CodeGen/Mips/mips16_fpret.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=2
-; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=3
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=4
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
+; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4
 
 
 @x = global float 0x41F487E980000000, align 4
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index ecb30b5c63b84..a1a99191595d0 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -1,6 +1,8 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
-;16: $eh_func_begin0=.
+;16: .cfi_personality
+;16-NEXT: [[TMP:.*]]:
+;16-NEXT: $eh_func_begin0 = ([[TMP]])
 @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
 @_ZTIi = external constant i8*
 @.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
index 10c5163f7fd0a..987980e080ff4 100644
--- a/test/CodeGen/Mips/mips16fpe.ll
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=16hf
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf
 
 @x = global float 5.000000e+00, align 4
 @y = global float 1.500000e+01, align 4
diff --git a/test/CodeGen/Mips/mips32r6/compatibility.ll b/test/CodeGen/Mips/mips32r6/compatibility.ll
new file mode 100644
index 0000000000000..8eac8d4683d15
--- /dev/null
+++ b/test/CodeGen/Mips/mips32r6/compatibility.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s
+; RUN: not llc -march=mipsel -mcpu=mips32r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s
+
+; CHECK: foo:
+; DSP: MIPS32r6 is not compatible with the DSP ASE
+
+define void @foo() nounwind {
+  ret void
+}
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index dc8bbfdd5bafd..7f7d515d690eb 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -1,5 +1,11 @@
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips4 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
 ; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
-; RUN: -disable-mips-delay-filler < %s | FileCheck %s
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r2 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r6 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=CMP_CC_FMT
 
 @gld0 = external global fp128
 @gld1 = external global fp128
@@ -7,8 +13,8 @@
 @gf1 = external global float
 @gd1 = external global double
 
-; CHECK-LABEL: addLD:
-; CHECK: ld $25, %call16(__addtf3)
+; ALL-LABEL: addLD:
+; ALL: ld $25, %call16(__addtf3)
 
 define fp128 @addLD() {
 entry:
@@ -18,8 +24,8 @@ entry:
   ret fp128 %add
 }
 
-; CHECK-LABEL: subLD:
-; CHECK: ld $25, %call16(__subtf3)
+; ALL-LABEL: subLD:
+; ALL: ld $25, %call16(__subtf3)
 
 define fp128 @subLD() {
 entry:
@@ -29,8 +35,8 @@ entry:
   ret fp128 %sub
 }
 
-; CHECK-LABEL: mulLD:
-; CHECK: ld $25, %call16(__multf3)
+; ALL-LABEL: mulLD:
+; ALL: ld $25, %call16(__multf3)
 
 define fp128 @mulLD() {
 entry:
@@ -40,8 +46,8 @@ entry:
   ret fp128 %mul
 }
 
-; CHECK-LABEL: divLD:
-; CHECK: ld $25, %call16(__divtf3)
+; ALL-LABEL: divLD:
+; ALL: ld $25, %call16(__divtf3)
 
 define fp128 @divLD() {
 entry:
@@ -51,8 +57,8 @@ entry:
   ret fp128 %div
 }
 
-; CHECK-LABEL: conv_LD_char:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_char:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_char(i8 signext %a) {
 entry:
@@ -60,8 +66,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_short:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_short:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_short(i16 signext %a) {
 entry:
@@ -69,8 +75,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_int:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_int:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_int(i32 %a) {
 entry:
@@ -78,8 +84,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_LL:
-; CHECK: ld $25, %call16(__floatditf)
+; ALL-LABEL: conv_LD_LL:
+; ALL: ld $25, %call16(__floatditf)
 
 define fp128 @conv_LD_LL(i64 %a) {
 entry:
@@ -87,8 +93,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UChar:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UChar:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UChar(i8 zeroext %a) {
 entry:
@@ -96,8 +102,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UShort:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UShort:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UShort(i16 zeroext %a) {
 entry:
@@ -105,8 +111,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UInt:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UInt:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UInt(i32 %a) {
 entry:
@@ -114,8 +120,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_ULL:
-; CHECK: ld $25, %call16(__floatunditf)
+; ALL-LABEL: conv_LD_ULL:
+; ALL: ld $25, %call16(__floatunditf)
 
 define fp128 @conv_LD_ULL(i64 %a) {
 entry:
@@ -123,8 +129,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_char_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_char_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define signext i8 @conv_char_LD(fp128 %a) {
 entry:
@@ -132,8 +138,8 @@ entry:
   ret i8 %conv
 }
 
-; CHECK-LABEL: conv_short_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_short_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define signext i16 @conv_short_LD(fp128 %a) {
 entry:
@@ -141,8 +147,8 @@ entry:
   ret i16 %conv
 }
 
-; CHECK-LABEL: conv_int_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_int_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define i32 @conv_int_LD(fp128 %a) {
 entry:
@@ -150,8 +156,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: conv_LL_LD:
-; CHECK: ld $25, %call16(__fixtfdi)
+; ALL-LABEL: conv_LL_LD:
+; ALL: ld $25, %call16(__fixtfdi)
 
 define i64 @conv_LL_LD(fp128 %a) {
 entry:
@@ -159,8 +165,8 @@ entry:
   ret i64 %conv
 }
 
-; CHECK-LABEL: conv_UChar_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_UChar_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define zeroext i8 @conv_UChar_LD(fp128 %a) {
 entry:
@@ -168,8 +174,8 @@ entry:
   ret i8 %conv
 }
 
-; CHECK-LABEL: conv_UShort_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_UShort_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define zeroext i16 @conv_UShort_LD(fp128 %a) {
 entry:
@@ -177,8 +183,8 @@ entry:
   ret i16 %conv
 }
 
-; CHECK-LABEL: conv_UInt_LD:
-; CHECK: ld $25, %call16(__fixunstfsi)
+; ALL-LABEL: conv_UInt_LD:
+; ALL: ld $25, %call16(__fixunstfsi)
 
 define i32 @conv_UInt_LD(fp128 %a) {
 entry:
@@ -186,8 +192,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: conv_ULL_LD:
-; CHECK: ld $25, %call16(__fixunstfdi)
+; ALL-LABEL: conv_ULL_LD:
+; ALL: ld $25, %call16(__fixunstfdi)
 
 define i64 @conv_ULL_LD(fp128 %a) {
 entry:
@@ -195,8 +201,8 @@ entry:
   ret i64 %conv
 }
 
-; CHECK-LABEL: conv_LD_float:
-; CHECK: ld $25, %call16(__extendsftf2)
+; ALL-LABEL: conv_LD_float:
+; ALL: ld $25, %call16(__extendsftf2)
 
 define fp128 @conv_LD_float(float %a) {
 entry:
@@ -204,8 +210,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_double:
-; CHECK: ld $25, %call16(__extenddftf2)
+; ALL-LABEL: conv_LD_double:
+; ALL: ld $25, %call16(__extenddftf2)
 
 define fp128 @conv_LD_double(double %a) {
 entry:
@@ -213,8 +219,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_float_LD:
-; CHECK: ld $25, %call16(__trunctfsf2)
+; ALL-LABEL: conv_float_LD:
+; ALL: ld $25, %call16(__trunctfsf2)
 
 define float @conv_float_LD(fp128 %a) {
 entry:
@@ -222,8 +228,8 @@ entry:
   ret float %conv
 }
 
-; CHECK-LABEL: conv_double_LD:
-; CHECK: ld $25, %call16(__trunctfdf2)
+; ALL-LABEL: conv_double_LD:
+; ALL: ld $25, %call16(__trunctfdf2)
 
 define double @conv_double_LD(fp128 %a) {
 entry:
@@ -231,13 +237,13 @@ entry:
   ret double %conv
 }
 
-; CHECK-LABEL:             libcall1_fabsl:
-; CHECK-DAG: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
-; CHECK-DAG: daddiu  $[[R1:[0-9]+]], $zero, 1
-; CHECK-DAG: dsll    $[[R2:[0-9]+]], $[[R1]], 63
-; CHECK-DAG: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
-; CHECK-DAG: and     $4, $[[R0]], $[[R3]]
-; CHECK-DAG: ld      $2, 0($[[R4]])
+; ALL-LABEL:             libcall1_fabsl:
+; ALL-DAG: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
+; ALL-DAG: daddiu  $[[R1:[0-9]+]], $zero, 1
+; ALL-DAG: dsll    $[[R2:[0-9]+]], $[[R1]], 63
+; ALL-DAG: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
+; ALL-DAG: and     $4, $[[R0]], $[[R3]]
+; ALL-DAG: ld      $2, 0($[[R4]])
 
 define fp128 @libcall1_fabsl() {
 entry:
@@ -248,8 +254,8 @@ entry:
 
 declare fp128 @fabsl(fp128) #1
 
-; CHECK-LABEL: libcall1_ceill:
-; CHECK: ld $25, %call16(ceill)
+; ALL-LABEL: libcall1_ceill:
+; ALL: ld $25, %call16(ceill)
 
 define fp128 @libcall1_ceill() {
 entry:
@@ -260,8 +266,8 @@ entry:
 
 declare fp128 @ceill(fp128) #1
 
-; CHECK-LABEL: libcall1_sinl:
-; CHECK: ld $25, %call16(sinl)
+; ALL-LABEL: libcall1_sinl:
+; ALL: ld $25, %call16(sinl)
 
 define fp128 @libcall1_sinl() {
 entry:
@@ -272,8 +278,8 @@ entry:
 
 declare fp128 @sinl(fp128) #2
 
-; CHECK-LABEL: libcall1_cosl:
-; CHECK: ld $25, %call16(cosl)
+; ALL-LABEL: libcall1_cosl:
+; ALL: ld $25, %call16(cosl)
 
 define fp128 @libcall1_cosl() {
 entry:
@@ -284,8 +290,8 @@ entry:
 
 declare fp128 @cosl(fp128) #2
 
-; CHECK-LABEL: libcall1_expl:
-; CHECK: ld $25, %call16(expl)
+; ALL-LABEL: libcall1_expl:
+; ALL: ld $25, %call16(expl)
 
 define fp128 @libcall1_expl() {
 entry:
@@ -296,8 +302,8 @@ entry:
 
 declare fp128 @expl(fp128) #2
 
-; CHECK-LABEL: libcall1_exp2l:
-; CHECK: ld $25, %call16(exp2l)
+; ALL-LABEL: libcall1_exp2l:
+; ALL: ld $25, %call16(exp2l)
 
 define fp128 @libcall1_exp2l() {
 entry:
@@ -308,8 +314,8 @@ entry:
 
 declare fp128 @exp2l(fp128) #2
 
-; CHECK-LABEL: libcall1_logl:
-; CHECK: ld $25, %call16(logl)
+; ALL-LABEL: libcall1_logl:
+; ALL: ld $25, %call16(logl)
 
 define fp128 @libcall1_logl() {
 entry:
@@ -320,8 +326,8 @@ entry:
 
 declare fp128 @logl(fp128) #2
 
-; CHECK-LABEL: libcall1_log2l:
-; CHECK: ld $25, %call16(log2l)
+; ALL-LABEL: libcall1_log2l:
+; ALL: ld $25, %call16(log2l)
 
 define fp128 @libcall1_log2l() {
 entry:
@@ -332,8 +338,8 @@ entry:
 
 declare fp128 @log2l(fp128) #2
 
-; CHECK-LABEL: libcall1_log10l:
-; CHECK: ld $25, %call16(log10l)
+; ALL-LABEL: libcall1_log10l:
+; ALL: ld $25, %call16(log10l)
 
 define fp128 @libcall1_log10l() {
 entry:
@@ -344,8 +350,8 @@ entry:
 
 declare fp128 @log10l(fp128) #2
 
-; CHECK-LABEL: libcall1_nearbyintl:
-; CHECK: ld $25, %call16(nearbyintl)
+; ALL-LABEL: libcall1_nearbyintl:
+; ALL: ld $25, %call16(nearbyintl)
 
 define fp128 @libcall1_nearbyintl() {
 entry:
@@ -356,8 +362,8 @@ entry:
 
 declare fp128 @nearbyintl(fp128) #1
 
-; CHECK-LABEL: libcall1_floorl:
-; CHECK: ld $25, %call16(floorl)
+; ALL-LABEL: libcall1_floorl:
+; ALL: ld $25, %call16(floorl)
 
 define fp128 @libcall1_floorl() {
 entry:
@@ -368,8 +374,8 @@ entry:
 
 declare fp128 @floorl(fp128) #1
 
-; CHECK-LABEL: libcall1_sqrtl:
-; CHECK: ld $25, %call16(sqrtl)
+; ALL-LABEL: libcall1_sqrtl:
+; ALL: ld $25, %call16(sqrtl)
 
 define fp128 @libcall1_sqrtl() {
 entry:
@@ -380,8 +386,8 @@ entry:
 
 declare fp128 @sqrtl(fp128) #2
 
-; CHECK-LABEL: libcall1_rintl:
-; CHECK: ld $25, %call16(rintl)
+; ALL-LABEL: libcall1_rintl:
+; ALL: ld $25, %call16(rintl)
 
 define fp128 @libcall1_rintl() {
 entry:
@@ -392,8 +398,8 @@ entry:
 
 declare fp128 @rintl(fp128) #1
 
-; CHECK-LABEL: libcall_powil:
-; CHECK: ld $25, %call16(__powitf2)
+; ALL-LABEL: libcall_powil:
+; ALL: ld $25, %call16(__powitf2)
 
 define fp128 @libcall_powil(fp128 %a, i32 %b) {
 entry:
@@ -403,18 +409,18 @@ entry:
 
 declare fp128 @llvm.powi.f128(fp128, i32) #3
 
-; CHECK-LABEL:     libcall2_copysignl:
-; CHECK-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
-; CHECK-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 63
-; CHECK-DAG: ld     $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK-DAG: ld     $[[R1:[0-9]+]], 8($[[R0]])
-; CHECK-DAG: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
-; CHECK-DAG: ld     $[[R5:[0-9]+]], %got_disp(gld0)
-; CHECK-DAG: ld     $[[R6:[0-9]+]], 8($[[R5]])
-; CHECK-DAG: daddiu $[[R7:[0-9]+]], $[[R3]], -1
-; CHECK-DAG: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
-; CHECK-DAG: or     $4, $[[R8]], $[[R4]]
-; CHECK-DAG: ld     $2, 0($[[R5]])
+; ALL-LABEL:     libcall2_copysignl:
+; ALL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; ALL-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 63
+; ALL-DAG: ld     $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL-DAG: ld     $[[R1:[0-9]+]], 8($[[R0]])
+; ALL-DAG: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
+; ALL-DAG: ld     $[[R5:[0-9]+]], %got_disp(gld0)
+; ALL-DAG: ld     $[[R6:[0-9]+]], 8($[[R5]])
+; ALL-DAG: daddiu $[[R7:[0-9]+]], $[[R3]], -1
+; ALL-DAG: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
+; ALL-DAG: or     $4, $[[R8]], $[[R4]]
+; ALL-DAG: ld     $2, 0($[[R5]])
 
 define fp128 @libcall2_copysignl() {
 entry:
@@ -426,8 +432,8 @@ entry:
 
 declare fp128 @copysignl(fp128, fp128) #1
 
-; CHECK-LABEL: libcall2_powl:
-; CHECK: ld $25, %call16(powl)
+; ALL-LABEL: libcall2_powl:
+; ALL: ld $25, %call16(powl)
 
 define fp128 @libcall2_powl() {
 entry:
@@ -439,8 +445,8 @@ entry:
 
 declare fp128 @powl(fp128, fp128) #2
 
-; CHECK-LABEL: libcall2_fmodl:
-; CHECK: ld $25, %call16(fmodl)
+; ALL-LABEL: libcall2_fmodl:
+; ALL: ld $25, %call16(fmodl)
 
 define fp128 @libcall2_fmodl() {
 entry:
@@ -452,8 +458,8 @@ entry:
 
 declare fp128 @fmodl(fp128, fp128) #2
 
-; CHECK-LABEL: libcall3_fmal:
-; CHECK: ld $25, %call16(fmal)
+; ALL-LABEL: libcall3_fmal:
+; ALL: ld $25, %call16(fmal)
 
 define fp128 @libcall3_fmal() {
 entry:
@@ -466,8 +472,8 @@ entry:
 
 declare fp128 @llvm.fma.f128(fp128, fp128, fp128) #4
 
-; CHECK-LABEL: cmp_lt:
-; CHECK: ld $25, %call16(__lttf2)
+; ALL-LABEL: cmp_lt:
+; ALL: ld $25, %call16(__lttf2)
 
 define i32 @cmp_lt(fp128 %a, fp128 %b) {
 entry:
@@ -476,8 +482,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_le:
-; CHECK: ld $25, %call16(__letf2)
+; ALL-LABEL: cmp_le:
+; ALL: ld $25, %call16(__letf2)
 
 define i32 @cmp_le(fp128 %a, fp128 %b) {
 entry:
@@ -486,8 +492,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_gt:
-; CHECK: ld $25, %call16(__gttf2)
+; ALL-LABEL: cmp_gt:
+; ALL: ld $25, %call16(__gttf2)
 
 define i32 @cmp_gt(fp128 %a, fp128 %b) {
 entry:
@@ -496,8 +502,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_ge:
-; CHECK: ld $25, %call16(__getf2)
+; ALL-LABEL: cmp_ge:
+; ALL: ld $25, %call16(__getf2)
 
 define i32 @cmp_ge(fp128 %a, fp128 %b) {
 entry:
@@ -506,8 +512,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_eq:
-; CHECK: ld $25, %call16(__eqtf2)
+; ALL-LABEL: cmp_eq:
+; ALL: ld $25, %call16(__eqtf2)
 
 define i32 @cmp_eq(fp128 %a, fp128 %b) {
 entry:
@@ -516,8 +522,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_ne:
-; CHECK: ld $25, %call16(__netf2)
+; ALL-LABEL: cmp_ne:
+; ALL: ld $25, %call16(__netf2)
 
 define i32 @cmp_ne(fp128 %a, fp128 %b) {
 entry:
@@ -526,10 +532,10 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: load_LD_LD:
-; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld $2, 0($[[R0]])
-; CHECK: ld $4, 8($[[R0]])
+; ALL-LABEL: load_LD_LD:
+; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld $2, 0($[[R0]])
+; ALL: ld $4, 8($[[R0]])
 
 define fp128 @load_LD_LD() {
 entry:
@@ -537,11 +543,11 @@ entry:
   ret fp128 %0
 }
 
-; CHECK-LABEL: load_LD_float:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gf1)
-; CHECK: lw   $4, 0($[[R0]])
-; CHECK: ld   $25, %call16(__extendsftf2)
-; CHECK: jalr $25
+; ALL-LABEL: load_LD_float:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gf1)
+; ALL: lw   $4, 0($[[R0]])
+; ALL: ld   $25, %call16(__extendsftf2)
+; ALL: jalr $25
 
 define fp128 @load_LD_float() {
 entry:
@@ -550,11 +556,11 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: load_LD_double:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gd1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $25, %call16(__extenddftf2)
-; CHECK: jalr $25
+; ALL-LABEL: load_LD_double:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gd1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $25, %call16(__extenddftf2)
+; ALL: jalr $25
 
 define fp128 @load_LD_double() {
 entry:
@@ -563,13 +569,13 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: store_LD_LD:
-; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK: ld $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: ld $[[R3:[0-9]+]], %got_disp(gld0)
-; CHECK: sd $[[R2]], 8($[[R3]])
-; CHECK: sd $[[R1]], 0($[[R3]])
+; ALL-LABEL: store_LD_LD:
+; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld $[[R1:[0-9]+]], 0($[[R0]])
+; ALL: ld $[[R2:[0-9]+]], 8($[[R0]])
+; ALL: ld $[[R3:[0-9]+]], %got_disp(gld0)
+; ALL: sd $[[R2]], 8($[[R3]])
+; ALL: sd $[[R1]], 0($[[R3]])
 
 define void @store_LD_LD() {
 entry:
@@ -578,14 +584,14 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: store_LD_float:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $5, 8($[[R0]])
-; CHECK: ld   $25, %call16(__trunctfsf2)
-; CHECK: jalr $25
-; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gf1)
-; CHECK: sw   $2, 0($[[R1]])
+; ALL-LABEL: store_LD_float:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $5, 8($[[R0]])
+; ALL: ld   $25, %call16(__trunctfsf2)
+; ALL: jalr $25
+; ALL: ld   $[[R1:[0-9]+]], %got_disp(gf1)
+; ALL: sw   $2, 0($[[R1]])
 
 define void @store_LD_float() {
 entry:
@@ -595,14 +601,14 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: store_LD_double:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $5, 8($[[R0]])
-; CHECK: ld   $25, %call16(__trunctfdf2)
-; CHECK: jalr $25
-; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gd1)
-; CHECK: sd   $2, 0($[[R1]])
+; ALL-LABEL: store_LD_double:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $5, 8($[[R0]])
+; ALL: ld   $25, %call16(__trunctfdf2)
+; ALL: jalr $25
+; ALL: ld   $[[R1:[0-9]+]], %got_disp(gd1)
+; ALL: sd   $2, 0($[[R1]])
 
 define void @store_LD_double() {
 entry:
@@ -612,11 +618,22 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: select_LD:
-; CHECK: movn $8, $6, $4
-; CHECK: movn $9, $7, $4
-; CHECK: move $2, $8
-; CHECK: move $4, $9
+; ALL-LABEL: select_LD:
+; C_CC_FMT:      movn $8, $6, $4
+; C_CC_FMT:      movn $9, $7, $4
+; C_CC_FMT:      move $2, $8
+; C_CC_FMT:      move $4, $9
+
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; CMP_CC_FMT-DAG: sll $[[CC:[0-9]+]], $4, 0
+; CMP_CC_FMT-DAG: seleqz $[[EQ1:[0-9]+]], $8, $[[CC]]
+; CMP_CC_FMT-DAG: selnez $[[NE1:[0-9]+]], $6, $[[CC]]
+; CMP_CC_FMT-DAG: or $2, $[[NE1]], $[[EQ1]]
+; CMP_CC_FMT-DAG: seleqz $[[EQ2:[0-9]+]], $9, $[[CC]]
+; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
+; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]
 
 define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
 entry:
@@ -625,18 +642,27 @@ entry:
   ret fp128 %cond
 }
 
-; CHECK-LABEL: selectCC_LD:
-; CHECK: move $[[R0:[0-9]+]], $11
-; CHECK: move $[[R1:[0-9]+]], $10
-; CHECK: move $[[R2:[0-9]+]], $9
-; CHECK: move $[[R3:[0-9]+]], $8
-; CHECK: ld   $25, %call16(__gttf2)($gp)
-; CHECK: jalr $25
-; CHECK: slti $1, $2, 1
-; CHECK: movz $[[R1]], $[[R3]], $1
-; CHECK: movz $[[R0]], $[[R2]], $1
-; CHECK: move $2, $[[R1]]
-; CHECK: move $4, $[[R0]]
+; ALL-LABEL: selectCC_LD:
+; ALL:           move $[[R0:[0-9]+]], $11
+; ALL:           move $[[R1:[0-9]+]], $10
+; ALL:           move $[[R2:[0-9]+]], $9
+; ALL:           move $[[R3:[0-9]+]], $8
+; ALL:           ld   $25, %call16(__gttf2)($gp)
+; ALL:           jalr $25
+
+; C_CC_FMT:      slti $[[CC:[0-9]+]], $2, 1
+; C_CC_FMT:      movz $[[R1]], $[[R3]], $[[CC]]
+; C_CC_FMT:      movz $[[R0]], $[[R2]], $[[CC]]
+; C_CC_FMT:      move $2, $[[R1]]
+; C_CC_FMT:      move $4, $[[R0]]
+
+; CMP_CC_FMT:    slt $[[CC:[0-9]+]], $zero, $2
+; CMP_CC_FMT:    seleqz $[[EQ1:[0-9]+]], $[[R1]], $[[CC]]
+; CMP_CC_FMT:    selnez $[[NE1:[0-9]+]], $[[R3]], $[[CC]]
+; CMP_CC_FMT:    or $2, $[[NE1]], $[[EQ1]]
+; CMP_CC_FMT:    seleqz $[[EQ2:[0-9]+]], $[[R0]], $[[CC]]
+; CMP_CC_FMT:    selnez $[[NE2:[0-9]+]], $[[R2]], $[[CC]]
+; CMP_CC_FMT:    or $4, $[[NE2]], $[[EQ2]]
 
 define fp128 @selectCC_LD(fp128 %a, fp128 %b, fp128 %c, fp128 %d) {
 entry:
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
deleted file mode 100644
index bbdc05cd2d8f8..0000000000000
--- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
+++ /dev/null
@@ -1,110 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
-
-%struct.S = type <{ [4 x float] }>
-%struct.S2 = type <{ [4 x double] }>
-%struct.S3 = type <{ i8, float }>
-
-@s = external global [4 x %struct.S]
-@gf = external global float
-@gd = external global double
-@s2 = external global [4 x %struct.S2]
-@s3 = external global %struct.S3
-
-define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
-entry:
-; CHECK: lwxc1
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
-  %0 = load float* %arrayidx, align 4
-  ret float %0
-}
-
-define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
-entry:
-; CHECK: ldxc1
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
-  ret double %0
-}
-
-define float @foo2(i32 %b, i32 %c) nounwind readonly {
-entry:
-; CHECK-NOT: luxc1
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  %0 = load float* %arrayidx2, align 1
-  ret float %0
-}
-
-define void @foo3(float* nocapture %b, i32 %o) nounwind {
-entry:
-; CHECK: swxc1
-  %0 = load float* @gf, align 4
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
-  store float %0, float* %arrayidx, align 4
-  ret void
-}
-
-define void @foo4(double* nocapture %b, i32 %o) nounwind {
-entry:
-; CHECK: sdxc1
-  %0 = load double* @gd, align 8
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
-  store double %0, double* %arrayidx, align 8
-  ret void
-}
-
-define void @foo5(i32 %b, i32 %c) nounwind {
-entry:
-; CHECK-NOT: suxc1
-  %0 = load float* @gf, align 4
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  store float %0, float* %arrayidx2, align 1
-  ret void
-}
-
-define double @foo6(i32 %b, i32 %c) nounwind readonly {
-entry:
-; CHECK: foo6
-; CHECK-NOT: luxc1
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx2, align 1
-  ret double %0
-}
-
-define void @foo7(i32 %b, i32 %c) nounwind {
-entry:
-; CHECK: foo7
-; CHECK-NOT: suxc1
-  %0 = load double* @gd, align 8
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  store double %0, double* %arrayidx2, align 1
-  ret void
-}
-
-define float @foo8() nounwind readonly {
-entry:
-; CHECK: foo8
-; CHECK-NOT: luxc1
-  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
-  ret float %0
-}
-
-define void @foo9(float %f) nounwind {
-entry:
-; CHECK: foo9
-; CHECK-NOT: suxc1
-  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
-  ret void
-}
-
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index e01609f3b1e4a..7a52c3d41d69e 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -1,16 +1,23 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
-%struct.S = type { [8 x i32] }
+define void @foo(i32* noalias sret %agg.result) nounwind {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: sw {{.*}}, 0($4)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $4
 
-@g = common global %struct.S zeroinitializer, align 4
+  store i32 42, i32* %agg.result
+  ret void
+}
 
-define void @f(%struct.S* noalias sret %agg.result) nounwind {
+define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
 entry:
-; CHECK: move $2, $4
+; CHECK-LABEL: bar:
+; CHECK: sw $4, 0($5)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $5
 
-  %0 = bitcast %struct.S* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
+  store i32 %v, i32* %agg.result
   ret void
 }
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
deleted file mode 100644
index b2b67e51ade07..0000000000000
--- a/test/CodeGen/Mips/mips64countleading.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
-
-define i64 @t1(i64 %X) nounwind readnone {
-entry:
-; CHECK: dclz
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
-  ret i64 %tmp1
-}
-
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-
-define i64 @t3(i64 %X) nounwind readnone {
-entry:
-; CHECK: dclo 
-  %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
-  ret i64 %tmp1
-}
-
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
index fa81b729e9c83..3d95f519bc643 100644
--- a/test/CodeGen/Mips/mips64directive.ll
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 @gl = global i64 1250999896321, align 8
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
index 02a35f8e6ed75..22ea0eb7769ca 100644
--- a/test/CodeGen/Mips/mips64ext.ll
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -1,4 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 define i64 @zext64_32(i32 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
index 17716da0c670d..19e076d1ecdac 100644
--- a/test/CodeGen/Mips/mips64fpimm0.ll
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 define double @foo1() nounwind readnone {
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index 24647b20bf2e0..2f42270b645dd 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,5 +1,7 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
index 1fc8636c480b7..c3fc61df42ba9 100644
--- a/test/CodeGen/Mips/mips64imm.ll
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i32 @foo1() nounwind readnone {
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index 2894d698adcc9..ed617be6532e2 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,98 +1,128 @@
-; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS4 -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=GPRMULDIV %s
 
 @gll0 = common global i64 0, align 8
 @gll1 = common global i64 0, align 8
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: daddu
+; ALL-LABEL: f0:
+; ALL:           daddu $2, ${{[45]}}, ${{[45]}}
   %add = add nsw i64 %a1, %a0
   ret i64 %add
 }
 
 define i64 @f1(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: dsubu
+; ALL-LABEL: f1:
+; ALL:           dsubu $2, $4, $5
   %sub = sub nsw i64 %a0, %a1
   ret i64 %sub
 }
 
 define i64 @f4(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: and
+; ALL-LABEL: f4:
+; ALL:           and $2, ${{[45]}}, ${{[45]}}
   %and = and i64 %a1, %a0
   ret i64 %and
 }
 
 define i64 @f5(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: or
+; ALL-LABEL: f5:
+; ALL:           or $2, ${{[45]}}, ${{[45]}}
   %or = or i64 %a1, %a0
   ret i64 %or
 }
 
 define i64 @f6(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: xor
+; ALL-LABEL: f6:
+; ALL:           xor $2, ${{[45]}}, ${{[45]}}
   %xor = xor i64 %a1, %a0
   ret i64 %xor
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f7:
+; ALL:           daddiu $2, $4, 20
   %add = add nsw i64 %a0, 20
   ret i64 %add
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, -20
+; ALL-LABEL: f8:
+; ALL:           daddiu $2, $4, -20
   %sub = add nsw i64 %a0, -20
   ret i64 %sub
 }
 
 define i64 @f9(i64 %a0) nounwind readnone {
 entry:
-; CHECK: andi ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f9:
+; ALL:           andi $2, $4, 20
   %and = and i64 %a0, 20
   ret i64 %and
 }
 
 define i64 @f10(i64 %a0) nounwind readnone {
 entry:
-; CHECK: ori ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f10:
+; ALL:           ori $2, $4, 20
   %or = or i64 %a0, 20
   ret i64 %or
 }
 
 define i64 @f11(i64 %a0) nounwind readnone {
 entry:
-; CHECK: xori ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f11:
+; ALL:           xori $2, $4, 20
   %xor = xor i64 %a0, 20
   ret i64 %xor
 }
 
 define i64 @f12(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: mult
+; ALL-LABEL: f12:
+
+; ACCMULDIV:     mult ${{[45]}}, ${{[45]}}
+; GPRMULDIV:     dmul $2, ${{[45]}}, ${{[45]}}
+
   %mul = mul nsw i64 %b, %a
   ret i64 %mul
 }
 
 define i64 @f13(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: mult
+; ALL-LABEL: f13:
+
+; ACCMULDIV:     mult ${{[45]}}, ${{[45]}}
+; GPRMULDIV:     dmul $2, ${{[45]}}, ${{[45]}}
+
   %mul = mul i64 %b, %a
   ret i64 %mul
 }
 
 define i64 @f14(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f14:
-; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mflo
+; ALL-LABEL: f14:
+; ALL-DAG:       ld $[[P0:[0-9]+]], %got_disp(gll0)(
+; ALL-DAG:       ld $[[P1:[0-9]+]], %got_disp(gll1)(
+; ALL-DAG:       ld $[[T0:[0-9]+]], 0($[[P0]])
+; ALL-DAG:       ld $[[T1:[0-9]+]], 0($[[P1]])
+
+; ACCMULDIV:     ddiv $zero, $[[T0]], $[[T1]]
+; ACCMULDIV:     teq $[[T1]], $zero, 7
+; ACCMULDIV:     mflo $2
+
+; GPRMULDIV:     ddiv $2, $[[T0]], $[[T1]]
+; GPRMULDIV:     teq $[[T1]], $zero, 7
+
   %0 = load i64* @gll0, align 8
   %1 = load i64* @gll1, align 8
   %div = sdiv i64 %0, %1
@@ -101,10 +131,19 @@ entry:
 
 define i64 @f15() nounwind readnone {
 entry:
-; CHECK-LABEL: f15:
-; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mflo
+; ALL-LABEL: f15:
+; ALL-DAG:       ld $[[P0:[0-9]+]], %got_disp(gll0)(
+; ALL-DAG:       ld $[[P1:[0-9]+]], %got_disp(gll1)(
+; ALL-DAG:       ld $[[T0:[0-9]+]], 0($[[P0]])
+; ALL-DAG:       ld $[[T1:[0-9]+]], 0($[[P1]])
+
+; ACCMULDIV:     ddivu $zero, $[[T0]], $[[T1]]
+; ACCMULDIV:     teq $[[T1]], $zero, 7
+; ACCMULDIV:     mflo $2
+
+; GPRMULDIV:     ddivu $2, $[[T0]], $[[T1]]
+; GPRMULDIV:     teq $[[T1]], $zero, 7
+
   %0 = load i64* @gll0, align 8
   %1 = load i64* @gll1, align 8
   %div = udiv i64 %0, %1
@@ -113,20 +152,30 @@ entry:
 
 define i64 @f16(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f16:
-; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mfhi
+; ALL-LABEL: f16:
+
+; ACCMULDIV:     ddiv $zero, $4, $5
+; ACCMULDIV:     teq $5, $zero, 7
+; ACCMULDIV:     mfhi $2
+
+; GPRMULDIV:     dmod $2, $4, $5
+; GPRMULDIV:     teq $5, $zero, 7
+
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
 
 define i64 @f17(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f17:
-; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mfhi
+; ALL-LABEL: f17:
+
+; ACCMULDIV:     ddivu $zero, $4, $5
+; ACCMULDIV:     teq $5, $zero, 7
+; ACCMULDIV:     mfhi $2
+
+; GPRMULDIV:     dmodu $2, $4, $5
+; GPRMULDIV:     teq $5, $zero, 7
+
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -135,14 +184,26 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclz $2, $4
+; ALL-LABEL: f18:
+
+; The MIPS4 version is too long to reasonably test. At least check we don't get dclz
+; MIPS4-NOT:     dclz
+
+; HAS-DCLO:      dclz $2, $4
+
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
 define i64 @f19(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclo $2, $4
+; ALL-LABEL: f19:
+
+; The MIPS4 version is too long to reasonably test. At least check we don't get dclo
+; MIPS4-NOT:     dclo
+
+; HAS-DCLO:      dclo $2, $4
+
   %neg = xor i64 %X, -1
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
@@ -150,7 +211,8 @@ entry:
 
 define i64 @f20(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: nor
+; ALL-LABEL: f20:
+; ALL:           nor $2, ${{[45]}}, ${{[45]}}
   %or = or i64 %b, %a
   %neg = xor i64 %or, -1
   ret i64 %neg
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index 0e310a8670f91..c3607baeefeb9 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,5 +1,7 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @c = common global i8 0, align 4
 @s = common global i16 0, align 4
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
index 54d504f92266e..e866b217a59ce 100644
--- a/test/CodeGen/Mips/mips64lea.ll
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define void @foo3() nounwind {
diff --git a/test/CodeGen/Mips/mips64load-store-left-right.ll b/test/CodeGen/Mips/mips64load-store-left-right.ll
deleted file mode 100644
index 4561429ad8b98..0000000000000
--- a/test/CodeGen/Mips/mips64load-store-left-right.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EB %s
-
-%struct.SLL = type { i64 }
-%struct.SI = type { i32 }
-%struct.SUI = type { i32 }
-
-@sll = common global %struct.SLL zeroinitializer, align 1
-@si = common global %struct.SI zeroinitializer, align 1
-@sui = common global %struct.SUI zeroinitializer, align 1
-
-define i64 @foo_load_ll() nounwind readonly {
-entry:
-; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: ldr $[[R0]], 0($[[R1]])
-; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: ldr $[[R0]], 7($[[R1]])
-
-  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret i64 %0
-}
-
-define i64 @foo_load_i() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  %conv = sext i32 %0 to i64
-  ret i64 %conv
-}
-
-define i64 @foo_load_ui() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EL: daddiu $[[R2:[0-9]+]], $zero, 1
-; EL: dsll   $[[R3:[0-9]+]], $[[R2]], 32
-; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1
-; EL: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-
-  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
-  %conv = zext i32 %0 to i64
-  ret i64 %conv
-}
-
-define void @foo_store_ll(i64 %a) nounwind {
-entry:
-; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: sdr $[[R0]], 0($[[R1]])
-; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: sdr $[[R0]], 7($[[R1]])
-
-  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret void
-}
-
-define void @foo_store_i(i32 %a) nounwind {
-entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
-
-  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  ret void
-}
-
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
index fd036a2ca9fbf..32d05a9da369d 100644
--- a/test/CodeGen/Mips/mips64muldiv.ll
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -1,49 +1,79 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR
+
+; FileCheck prefixes:
+;   ALL - All targets
+;   ACC - Targets with accumulator based mul/div (i.e. pre-MIPS32r6)
+;   GPR - Targets with register based mul/div (i.e. MIPS32r6)
 
 define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: dmult
-; CHECK: mflo
+; ALL-LABEL: m0:
+; ACC:           dmult ${{[45]}}, ${{[45]}}
+; ACC:           mflo $2
+; GPR:           dmul $2, ${{[45]}}, ${{[45]}}
   %mul = mul i64 %a1, %a0
   ret i64 %mul
 }
 
 define i64 @m1(i64 %a) nounwind readnone {
 entry:
-; CHECK: dmult
-; CHECK: mfhi
+; ALL-LABEL: m1:
+; ALL:           lui $[[T0:[0-9]+]], 21845
+; ALL:           addiu $[[T0]], $[[T0]], 21845
+; ALL:           dsll $[[T0]], $[[T0]], 16
+; ALL:           addiu $[[T0]], $[[T0]], 21845
+; ALL:           dsll $[[T0]], $[[T0]], 16
+; ALL:           addiu $[[T0]], $[[T0]], 21846
+
+; ACC:           dmult $4, $[[T0]]
+; ACC:           mfhi $[[T1:[0-9]+]]
+; GPR:           dmuh $[[T1:[0-9]+]], $4, $[[T0]]
+
+; ALL:           dsrl $2, $[[T1]], 63
+; ALL:           daddu $2, $[[T1]], $2
   %div = sdiv i64 %a, 3
   ret i64 %div
 }
 
 define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddivu
-; CHECK: mflo
+; ALL-LABEL: d0:
+; ACC:           ddivu $zero, $4, $5
+; ACC:           mflo $2
+; GPR:           ddivu $2, $4, $5
   %div = udiv i64 %a0, %a1
   ret i64 %div
 }
 
 define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddiv
-; CHECK: mflo
+; ALL-LABEL: d1:
+; ACC:           ddiv $zero, $4, $5
+; ACC:           mflo $2
+; GPR:           ddiv $2, $4, $5
   %div = sdiv i64 %a0, %a1
   ret i64 %div
 }
 
 define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddivu
-; CHECK: mfhi
+; ALL-LABEL: d2:
+; ACC:           ddivu $zero, $4, $5
+; ACC:           mfhi $2
+; GPR:           dmodu $2, $4, $5
   %rem = urem i64 %a0, %a1
   ret i64 %rem
 }
 
 define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddiv
-; CHECK: mfhi
+; ALL-LABEL: d3:
+; ACC:           ddiv $zero, $4, $5
+; ACC:           mfhi $2
+; GPR:           dmod $2, $4, $5
   %rem = srem i64 %a0, %a1
   ret i64 %rem
 }
diff --git a/test/CodeGen/Mips/mips64r6/compatibility.ll b/test/CodeGen/Mips/mips64r6/compatibility.ll
new file mode 100644
index 0000000000000..429f68d784bbf
--- /dev/null
+++ b/test/CodeGen/Mips/mips64r6/compatibility.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -mcpu=mips64r6 < %s | FileCheck %s
+; RUN: not llc -march=mipsel -mcpu=mips64r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s
+
+; CHECK: foo:
+; DSP: MIPS64r6 is not compatible with the DSP ASE
+
+define void @foo() nounwind {
+  ret void
+}
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
index f4854f880542d..db653eadf2f76 100644
--- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -1,33 +1,113 @@
-; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
-; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
-; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=LE-STATIC
-; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=BE-PIC
+; Check that [sl]dc1 are normally emitted. MIPS32r2 should have [sl]dxc1 too.
+; RUN: llc -march=mipsel -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1-LDC1
 ; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
-; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2-LDXC1
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6-LDC1
+
+; Check that -mno-ldc1-sdc1 disables [sl]dc1
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-LE -check-prefix=32R1-LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-LE -check-prefix=32R2-LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-LE -check-prefix=32R6-LE-PIC
+
+; Check again for big-endian
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-BE -check-prefix=32R1-BE-PIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-BE -check-prefix=32R2-BE-PIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-BE -check-prefix=32R6-BE-PIC
+
+; Check again for the static relocation model
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-LE -check-prefix=32R1-LE-STATIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-LE -check-prefix=32R2-LE-STATIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-LE -check-prefix=32R6-LE-STATIC
 
 @g0 = common global double 0.000000e+00, align 8
 
-; LE-PIC-LABEL: test_ldc1:
-; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; LE-PIC-DAG: mtc1 $[[R0]], $f0
-; LE-PIC-DAG: mtc1 $[[R1]], $f1
-; LE-STATIC-LABEL: test_ldc1:
-; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
-; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
-; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
-; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
-; LE-STATIC-DAG: mtc1 $[[R1]], $f0
-; LE-STATIC-DAG: mtc1 $[[R3]], $f1
-; BE-PIC-LABEL: test_ldc1:
-; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; BE-PIC-DAG: mtc1 $[[R1]], $f0
-; BE-PIC-DAG: mtc1 $[[R0]], $f1
-; CHECK-LDC1-SDC1-LABEL: test_ldc1:
-; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
+; ALL-LABEL: test_ldc1:
+
+; 32R1-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R1-LE-PIC-DAG:    mtc1 $[[R1]], $f1
+
+; 32R2-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R2-LE-PIC-DAG:    mthc1 $[[R1]], $f0
+
+; 32R6-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R6-LE-PIC-DAG:    mthc1 $[[R1]], $f0
+
+; 32R1-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R1-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R1-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R1-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R1-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R1-LE-STATIC-DAG: mtc1 $[[R3]], $f1
+
+; 32R2-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R2-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R2-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R2-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R2-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R2-LE-STATIC-DAG: mthc1 $[[R3]], $f0
+
+; 32R6-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R6-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R6-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R6-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R6-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R6-LE-STATIC-DAG: mthc1 $[[R3]], $f0
+
+; 32R1-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R1-BE-PIC-DAG:    mtc1 $[[R0]], $f1
+
+; 32R2-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R2-BE-PIC-DAG:    mthc1 $[[R0]], $f0
+
+; 32R6-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R6-BE-PIC-DAG:    mthc1 $[[R0]], $f0
+
+; 32R1-LDC1:          ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:         ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R6-LDC1:          ldc1 $f0, 0(${{[0-9]+}})
 
 define double @test_ldc1() {
 entry:
@@ -35,25 +115,64 @@ entry:
   ret double %0
 }
 
-; LE-PIC-LABEL: test_sdc1:
-; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
-; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
-; LE-STATIC-LABEL: test_sdc1:
-; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
-; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
-; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
-; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
-; BE-PIC-LABEL: test_sdc1:
-; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
-; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
-; CHECK-LDC1-SDC1-LABEL: test_sdc1:
-; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
+; ALL-LABEL: test_sdc1:
+
+; 32R1-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-LE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R2-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-LE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R2-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R6-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-LE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R1-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R1-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R1-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R1-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R2-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-LE-STATIC-DAG: mfhc1 $[[R1:[0-9]+]], $f12
+; 32R2-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R2-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R2-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R2-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R6-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-LE-STATIC-DAG: mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R6-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R6-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R6-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R1-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-BE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R2-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-BE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R2-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R6-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-BE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R1-LDC1:          sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:         sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R6-LDC1:          sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
 
 define void @test_sdc1(double %a) {
 entry:
@@ -61,14 +180,35 @@ entry:
   ret void
 }
 
+; ALL-LABEL: test_ldxc1:
+
+; 32R1-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-DAG:      mtc1 $[[R0]], $f0
+; 32R1-DAG:      mtc1 $[[R1]], $f1
+
+; 32R2-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-DAG:      mtc1 $[[R0]], $f0
+; 32R2-DAG:      mthc1 $[[R1]], $f0
 
-; LE-PIC-LABEL: test_ldxc1:
-; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; LE-PIC-DAG: mtc1 $[[R0]], $f0
-; LE-PIC-DAG: mtc1 $[[R1]], $f1
-; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
-; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
+; 32R6-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      mtc1 $[[R0]], $f0
+; 32R6-DAG:      mthc1 $[[R1]], $f0
+
+; 32R1-LDC1:     ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:    sll $[[OFFSET:[0-9]+]], $5, 3
+; 32R2-LDXC1:    ldxc1 $f0, $[[OFFSET]]($4)
+
+; 32R6-LDC1:     ldc1 $f0, 0(${{[0-9]+}})
 
 define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
 entry:
@@ -77,13 +217,29 @@ entry:
   ret double %0
 }
 
-; LE-PIC-LABEL: test_sdxc1:
-; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
-; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
-; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
-; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
+; ALL-LABEL: test_sdxc1:
+
+; 32R1-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-DAG:      mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R1-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R2-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-DAG:      mfhc1 $[[R1:[0-9]+]], $f12
+; 32R2-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R2-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R6-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-DAG:      mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R6-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R1-LDC1:     sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:    sll $[[OFFSET:[0-9]+]], $7, 3
+; 32R2-LDXC1:    sdxc1 $f{{[0-9]+}}, $[[OFFSET]]($6)
+
+; 32R6-LDC1:     sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
 
 define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
 entry:
diff --git a/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
index 6f6e1b9ce2f8f..64e459e4d9a93 100644
--- a/test/CodeGen/Mips/msa/2r_vector_scalar.ll
+++ b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
@@ -1,8 +1,14 @@
 ; Test the MSA intrinsics that are encoded with the 2R instruction format and
 ; convert scalars to vectors.
 
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
 
 @llvm_mips_fill_b_ARG1 = global i32 23, align 16
 @llvm_mips_fill_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
@@ -17,11 +23,12 @@ entry:
 
 declare <16 x i8> @llvm.mips.fill.b(i32) nounwind
 
-; CHECK: llvm_mips_fill_b_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]],
-; CHECK-DAG: fill.b [[R2:\$w[0-9]+]], [[R1]]
-; CHECK-DAG: st.b [[R2]],
-; CHECK: .size llvm_mips_fill_b_test
+; MIPS-ANY: llvm_mips_fill_b_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]],
+; MIPS64-DAG: ld [[R1:\$[0-9]+]],
+; MIPS-ANY-DAG: fill.b [[R2:\$w[0-9]+]], [[R1]]
+; MIPS-ANY-DAG: st.b [[R2]],
+; MIPS-ANY: .size llvm_mips_fill_b_test
 ;
 @llvm_mips_fill_h_ARG1 = global i32 23, align 16
 @llvm_mips_fill_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
@@ -36,11 +43,12 @@ entry:
 
 declare <8 x i16> @llvm.mips.fill.h(i32) nounwind
 
-; CHECK: llvm_mips_fill_h_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]],
-; CHECK-DAG: fill.h [[R2:\$w[0-9]+]], [[R1]]
-; CHECK-DAG: st.h [[R2]],
-; CHECK: .size llvm_mips_fill_h_test
+; MIPS-ANY: llvm_mips_fill_h_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]],
+; MIPS64-DAG: ld [[R1:\$[0-9]+]],
+; MIPS-ANY-DAG: fill.h [[R2:\$w[0-9]+]], [[R1]]
+; MIPS-ANY-DAG: st.h [[R2]],
+; MIPS-ANY: .size llvm_mips_fill_h_test
 ;
 @llvm_mips_fill_w_ARG1 = global i32 23, align 16
 @llvm_mips_fill_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
@@ -55,11 +63,12 @@ entry:
 
 declare <4 x i32> @llvm.mips.fill.w(i32) nounwind
 
-; CHECK: llvm_mips_fill_w_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]],
-; CHECK-DAG: fill.w [[R2:\$w[0-9]+]], [[R1]]
-; CHECK-DAG: st.w [[R2]],
-; CHECK: .size llvm_mips_fill_w_test
+; MIPS-ANY: llvm_mips_fill_w_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]],
+; MIPS64-DAG: ld [[R1:\$[0-9]+]],
+; MIPS-ANY-DAG: fill.w [[R2:\$w[0-9]+]], [[R1]]
+; MIPS-ANY-DAG: st.w [[R2]],
+; MIPS-ANY: .size llvm_mips_fill_w_test
 ;
 @llvm_mips_fill_d_ARG1 = global i64 23, align 16
 @llvm_mips_fill_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
@@ -74,14 +83,18 @@ entry:
 
 declare <2 x i64> @llvm.mips.fill.d(i64) nounwind
 
-; CHECK: llvm_mips_fill_d_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
-; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 0
-; CHECK-DAG: insert.w [[R3]][0], [[R1]]
-; CHECK-DAG: insert.w [[R3]][1], [[R2]]
-; CHECK-DAG: insert.w [[R3]][2], [[R1]]
-; CHECK-DAG: insert.w [[R3]][3], [[R2]]
-; CHECK-DAG: st.w [[R3]],
-; CHECK: .size llvm_mips_fill_d_test
-;
+; MIPS-ANY: llvm_mips_fill_d_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], 0(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], 4(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_fill_d_ARG1)
+; MIPS32-DAG: ldi.b [[R3:\$w[0-9]+]], 0
+; MIPS32-DAG: insert.w [[R3]][0], [[R1]]
+; MIPS32-DAG: insert.w [[R3]][1], [[R2]]
+; MIPS32-DAG: insert.w [[R3]][2], [[R1]]
+; MIPS32-DAG: insert.w [[R3]][3], [[R2]]
+; MIPS64-DAG: fill.d [[WD:\$w[0-9]+]], [[R1]]
+; MIPS32-DAG: st.w [[R3]],
+; MIPS64-DAG: ld [[RD:\$[0-9]+]], %got_disp(llvm_mips_fill_d_RES)
+; MIPS64-DAG: st.d [[WD]], 0([[RD]])
+; MIPS-ANY: .size llvm_mips_fill_d_test
+;
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/msa/3r-s.ll b/test/CodeGen/Mips/msa/3r-s.ll
index 30cf265233e51..581c3bfd78afe 100644
--- a/test/CodeGen/Mips/msa/3r-s.ll
+++ b/test/CodeGen/Mips/msa/3r-s.ll
@@ -5,98 +5,114 @@
 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
 
 @llvm_mips_sld_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
-@llvm_mips_sld_b_ARG2 = global i32 10, align 16
+@llvm_mips_sld_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sld_b_ARG3 = global i32 10, align 16
 @llvm_mips_sld_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
 
 define void @llvm_mips_sld_b_test() nounwind {
 entry:
   %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1
-  %1 = load i32* @llvm_mips_sld_b_ARG2
-  %2 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, i32 %1)
-  store <16 x i8> %2, <16 x i8>* @llvm_mips_sld_b_RES
+  %1 = load <16 x i8>* @llvm_mips_sld_b_ARG2
+  %2 = load i32* @llvm_mips_sld_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, <16 x i8> %1, i32 %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_sld_b_RES
   ret void
 }
 
-declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, i32) nounwind
+declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, <16 x i8>, i32) nounwind
 
 ; CHECK: llvm_mips_sld_b_test:
 ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_b_ARG1)
 ; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_b_ARG2)
-; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
-; CHECK-DAG: sld.b [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_b_ARG3)
+; CHECK-DAG: ld.b [[WD:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]])
+; CHECK-DAG: sld.b [[WD]], [[WS]]{{\[}}[[RT]]{{\]}}
 ; CHECK-DAG: st.b [[WD]]
 ; CHECK: .size llvm_mips_sld_b_test
 ;
 @llvm_mips_sld_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
-@llvm_mips_sld_h_ARG2 = global i32 10, align 16
+@llvm_mips_sld_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sld_h_ARG3 = global i32 10, align 16
 @llvm_mips_sld_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
 
 define void @llvm_mips_sld_h_test() nounwind {
 entry:
   %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1
-  %1 = load i32* @llvm_mips_sld_h_ARG2
-  %2 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, i32 %1)
-  store <8 x i16> %2, <8 x i16>* @llvm_mips_sld_h_RES
+  %1 = load <8 x i16>* @llvm_mips_sld_h_ARG2
+  %2 = load i32* @llvm_mips_sld_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, <8 x i16> %1, i32 %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_sld_h_RES
   ret void
 }
 
-declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, <8 x i16>, i32) nounwind
 
 ; CHECK: llvm_mips_sld_h_test:
 ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_h_ARG1)
-; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2)
-; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
-; CHECK-DAG: sld.h [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2)
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_h_ARG3)
+; CHECK-DAG: ld.h [[WD:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]])
+; CHECK-DAG: sld.h [[WD]], [[WS]]{{\[}}[[RT]]{{\]}}
 ; CHECK-DAG: st.h [[WD]]
 ; CHECK: .size llvm_mips_sld_h_test
 ;
 @llvm_mips_sld_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
-@llvm_mips_sld_w_ARG2 = global i32 10, align 16
+@llvm_mips_sld_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sld_w_ARG3 = global i32 10, align 16
 @llvm_mips_sld_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
 
 define void @llvm_mips_sld_w_test() nounwind {
 entry:
   %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1
-  %1 = load i32* @llvm_mips_sld_w_ARG2
-  %2 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, i32 %1)
-  store <4 x i32> %2, <4 x i32>* @llvm_mips_sld_w_RES
+  %1 = load <4 x i32>* @llvm_mips_sld_w_ARG2
+  %2 = load i32* @llvm_mips_sld_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, <4 x i32> %1, i32 %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_sld_w_RES
   ret void
 }
 
-declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, i32) nounwind
+declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, <4 x i32>, i32) nounwind
 
 ; CHECK: llvm_mips_sld_w_test:
 ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_w_ARG1)
-; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2)
-; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
-; CHECK-DAG: sld.w [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2)
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_w_ARG3)
+; CHECK-DAG: ld.w [[WD:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]])
+; CHECK-DAG: sld.w [[WD]], [[WS]]{{\[}}[[RT]]{{\]}}
 ; CHECK-DAG: st.w [[WD]]
 ; CHECK: .size llvm_mips_sld_w_test
 ;
 @llvm_mips_sld_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
-@llvm_mips_sld_d_ARG2 = global i32 10, align 16
+@llvm_mips_sld_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sld_d_ARG3 = global i32 10, align 16
 @llvm_mips_sld_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
 
 define void @llvm_mips_sld_d_test() nounwind {
 entry:
   %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1
-  %1 = load i32* @llvm_mips_sld_d_ARG2
-  %2 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, i32 %1)
-  store <2 x i64> %2, <2 x i64>* @llvm_mips_sld_d_RES
+  %1 = load <2 x i64>* @llvm_mips_sld_d_ARG2
+  %2 = load i32* @llvm_mips_sld_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, <2 x i64> %1, i32 %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_sld_d_RES
   ret void
 }
 
-declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, i32) nounwind
+declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, <2 x i64>, i32) nounwind
 
 ; CHECK: llvm_mips_sld_d_test:
 ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_d_ARG1)
-; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2)
-; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
-; CHECK-DAG: sld.d [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2)
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_d_ARG3)
+; CHECK-DAG: ld.d [[WD:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]])
+; CHECK-DAG: sld.d [[WD]], [[WS]]{{\[}}[[RT]]{{\]}}
 ; CHECK-DAG: st.d [[WD]]
 ; CHECK: .size llvm_mips_sld_d_test
 ;
diff --git a/test/CodeGen/Mips/msa/arithmetic_float.ll b/test/CodeGen/Mips/msa/arithmetic_float.ll
index dc38721292056..86e57ac85a3b2 100644
--- a/test/CodeGen/Mips/msa/arithmetic_float.ll
+++ b/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -295,7 +295,8 @@ define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
   %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo(
+  ; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]])
   ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
   store <2 x double> %3, <2 x double>* %c
   ; CHECK-DAG: st.d [[R4]], 0($4)
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
index 0169a0780d36e..dbdf42be49ca4 100644
--- a/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -6,10 +6,11 @@
 @v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
 @v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 @v2i64 = global <2 x i64> <i64 0, i64 0>
+@i32 = global i32 0
 @i64 = global i64 0
 
 define void @const_v16i8() nounwind {
-  ; MIPS32-AE: const_v16i8:
+  ; MIPS32-AE-LABEL: const_v16i8:
 
   store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -18,10 +19,12 @@ define void @const_v16i8() nounwind {
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
 
   store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
-  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
-  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
   ; MIPS32-BE: ldi.h [[R1:\$w[0-9]+]], 256
@@ -35,14 +38,15 @@ define void @const_v16i8() nounwind {
   ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
 
   store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
-  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32-AE: .size const_v16i8
 }
 
 define void @const_v8i16() nounwind {
-  ; MIPS32-AE: const_v8i16:
+  ; MIPS32-AE-LABEL: const_v8i16:
 
   store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -51,7 +55,8 @@ define void @const_v8i16() nounwind {
   ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
 
   store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
-  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 4
@@ -64,14 +69,15 @@ define void @const_v8i16() nounwind {
   ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
 
   store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
-  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32-AE: .size const_v8i16
 }
 
 define void @const_v4i32() nounwind {
-  ; MIPS32-AE: const_v4i32:
+  ; MIPS32-AE-LABEL: const_v4i32:
 
   store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -80,7 +86,8 @@ define void @const_v4i32() nounwind {
   ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
 
   store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
-  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
@@ -89,17 +96,19 @@ define void @const_v4i32() nounwind {
   ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
 
   store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
-  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
-  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32-AE: .size const_v4i32
 }
 
 define void @const_v2i64() nounwind {
-  ; MIPS32-AE: const_v2i64:
+  ; MIPS32-AE-LABEL: const_v2i64:
 
   store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -117,17 +126,19 @@ define void @const_v2i64() nounwind {
   ; MIPS32-AE: ldi.d [[R1:\$w[0-9]+]], 1
 
   store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
-  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
-  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32-AE: .size const_v2i64
 }
 
 define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) nounwind {
-  ; MIPS32-AE: nonconst_v16i8:
+  ; MIPS32-AE-LABEL: nonconst_v16i8:
 
   %1 = insertelement <16 x i8> undef, i8 %a, i32 0
   %2 = insertelement <16 x i8> %1, i8 %b, i32 1
@@ -177,7 +188,7 @@ define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8
 }
 
 define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) nounwind {
-  ; MIPS32-AE: nonconst_v8i16:
+  ; MIPS32-AE-LABEL: nonconst_v8i16:
 
   %1 = insertelement <8 x i16> undef, i16 %a, i32 0
   %2 = insertelement <8 x i16> %1, i16 %b, i32 1
@@ -211,7 +222,7 @@ define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16
 }
 
 define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-  ; MIPS32-AE: nonconst_v4i32:
+  ; MIPS32-AE-LABEL: nonconst_v4i32:
 
   %1 = insertelement <4 x i32> undef, i32 %a, i32 0
   %2 = insertelement <4 x i32> %1, i32 %b, i32 1
@@ -229,7 +240,7 @@ define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 }
 
 define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
-  ; MIPS32-AE: nonconst_v2i64:
+  ; MIPS32-AE-LABEL: nonconst_v2i64:
 
   %1 = insertelement <2 x i64> undef, i64 %a, i32 0
   %2 = insertelement <2 x i64> %1, i64 %b, i32 1
@@ -245,7 +256,7 @@ define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
 }
 
 define i32 @extract_sext_v16i8() nounwind {
-  ; MIPS32-AE: extract_sext_v16i8:
+  ; MIPS32-AE-LABEL: extract_sext_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -264,7 +275,7 @@ define i32 @extract_sext_v16i8() nounwind {
 }
 
 define i32 @extract_sext_v8i16() nounwind {
-  ; MIPS32-AE: extract_sext_v8i16:
+  ; MIPS32-AE-LABEL: extract_sext_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -283,7 +294,7 @@ define i32 @extract_sext_v8i16() nounwind {
 }
 
 define i32 @extract_sext_v4i32() nounwind {
-  ; MIPS32-AE: extract_sext_v4i32:
+  ; MIPS32-AE-LABEL: extract_sext_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -299,7 +310,7 @@ define i32 @extract_sext_v4i32() nounwind {
 }
 
 define i64 @extract_sext_v2i64() nounwind {
-  ; MIPS32-AE: extract_sext_v2i64:
+  ; MIPS32-AE-LABEL: extract_sext_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -318,7 +329,7 @@ define i64 @extract_sext_v2i64() nounwind {
 }
 
 define i32 @extract_zext_v16i8() nounwind {
-  ; MIPS32-AE: extract_zext_v16i8:
+  ; MIPS32-AE-LABEL: extract_zext_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -336,7 +347,7 @@ define i32 @extract_zext_v16i8() nounwind {
 }
 
 define i32 @extract_zext_v8i16() nounwind {
-  ; MIPS32-AE: extract_zext_v8i16:
+  ; MIPS32-AE-LABEL: extract_zext_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -354,7 +365,7 @@ define i32 @extract_zext_v8i16() nounwind {
 }
 
 define i32 @extract_zext_v4i32() nounwind {
-  ; MIPS32-AE: extract_zext_v4i32:
+  ; MIPS32-AE-LABEL: extract_zext_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -370,7 +381,7 @@ define i32 @extract_zext_v4i32() nounwind {
 }
 
 define i64 @extract_zext_v2i64() nounwind {
-  ; MIPS32-AE: extract_zext_v2i64:
+  ; MIPS32-AE-LABEL: extract_zext_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -387,8 +398,200 @@ define i64 @extract_zext_v2i64() nounwind {
   ; MIPS32-AE: .size extract_zext_v2i64
 }
 
+define i32 @extract_sext_v16i8_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <16 x i8> %2, i32 %3
+  %5 = sext i8 %4 to i32
+  ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_sext_v16i8_vidx
+}
+
+define i32 @extract_sext_v8i16_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <8 x i16> %2, i32 %3
+  %5 = sext i16 %4 to i32
+  ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_sext_v8i16_vidx
+}
+
+define i32 @extract_sext_v4i32_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x i32> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v4i32_vidx
+}
+
+define i64 @extract_sext_v2i64_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x i64> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+  ; MIPS32-AE-NOT: sra
+
+  ret i64 %4
+  ; MIPS32-AE: .size extract_sext_v2i64_vidx
+}
+
+define i32 @extract_zext_v16i8_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <16 x i8> %2, i32 %3
+  %5 = zext i8 %4 to i32
+  ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_zext_v16i8_vidx
+}
+
+define i32 @extract_zext_v8i16_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <8 x i16> %2, i32 %3
+  %5 = zext i16 %4 to i32
+  ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_zext_v8i16_vidx
+}
+
+define i32 @extract_zext_v4i32_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x i32> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-NOT: srl
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v4i32_vidx
+}
+
+define i64 @extract_zext_v2i64_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x i64> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+  ; MIPS32-AE-NOT: srl
+
+  ret i64 %4
+  ; MIPS32-AE: .size extract_zext_v2i64_vidx
+}
+
 define void @insert_v16i8(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v16i8:
+  ; MIPS32-AE-LABEL: insert_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -410,7 +613,7 @@ define void @insert_v16i8(i32 %a) nounwind {
 }
 
 define void @insert_v8i16(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v8i16:
+  ; MIPS32-AE-LABEL: insert_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -432,7 +635,7 @@ define void @insert_v8i16(i32 %a) nounwind {
 }
 
 define void @insert_v4i32(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v4i32:
+  ; MIPS32-AE-LABEL: insert_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -451,7 +654,7 @@ define void @insert_v4i32(i32 %a) nounwind {
 }
 
 define void @insert_v2i64(i64 %a) nounwind {
-  ; MIPS32-AE: insert_v2i64:
+  ; MIPS32-AE-LABEL: insert_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -470,8 +673,131 @@ define void @insert_v2i64(i64 %a) nounwind {
   ; MIPS32-AE: .size insert_v2i64
 }
 
+define void @insert_v16i8_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %a2 = trunc i32 %a to i8
+  %a3 = sext i8 %a2 to i32
+  %a4 = trunc i32 %a3 to i8
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <16 x i8> %1, i8 %a4, i32 %2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <16 x i8> %3, <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: st.b [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v16i8_vidx
+}
+
+define void @insert_v8i16_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %a2 = trunc i32 %a to i16
+  %a3 = sext i16 %a2 to i32
+  %a4 = trunc i32 %a3 to i16
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <8 x i16> %1, i16 %a4, i32 %2
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.h [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <8 x i16> %3, <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: st.h [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v8i16_vidx
+}
+
+define void @insert_v4i32_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <4 x i32> %1, i32 %a, i32 %2
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <4 x i32> %3, <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v4i32_vidx
+}
+
+define void @insert_v2i64_vidx(i64 %a) nounwind {
+  ; MIPS32-AE: insert_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <2 x i64> %1, i64 %a, i32 %2
+  ; TODO: This code could be a lot better but it works. The legalizer splits
+  ; 64-bit inserts into two 32-bit inserts because there is no i64 type on
+  ; MIPS32. The obvious optimisation is to perform both insert.w's at once while
+  ; the vector is rotated.
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+  ; MIPS32-AE-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $5
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <2 x i64> %3, <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v2i64_vidx
+}
+
 define void @truncstore() nounwind {
-  ; MIPS32-AE: truncstore:
+  ; MIPS32-AE-LABEL: truncstore:
 
   store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
   ; TODO: What code should be emitted?
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
index 1f538108a1fae..a0c9d29e231ae 100644
--- a/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -3,11 +3,12 @@
 
 @v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
 @v2f64 = global <2 x double> <double 0.0, double 0.0>
+@i32 = global i32 0
 @f32 = global float 0.0
 @f64 = global double 0.0
 
 define void @const_v4f32() nounwind {
-  ; MIPS32: const_v4f32:
+  ; MIPS32-LABEL: const_v4f32:
 
   store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
   ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
@@ -17,7 +18,8 @@ define void @const_v4f32() nounwind {
   ; MIPS32: fill.w  [[R2:\$w[0-9]+]], [[R1]]
 
   store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
-  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
   ; MIPS32: lui     [[R1:\$[0-9]+]], 18304
@@ -25,45 +27,53 @@ define void @const_v4f32() nounwind {
   ; MIPS32: fill.w  [[R3:\$w[0-9]+]], [[R2]]
 
   store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
-  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
-  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32: .size const_v4f32
 }
 
 define void @const_v2f64() nounwind {
-  ; MIPS32: const_v2f64:
+  ; MIPS32-LABEL: const_v2f64:
 
   store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
   ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
 
   store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
-  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+  ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
 
   ret void
   ; MIPS32: .size const_v2f64
 }
 
 define void @nonconst_v4f32() nounwind {
-  ; MIPS32: nonconst_v4f32:
+  ; MIPS32-LABEL: nonconst_v4f32:
 
   %1 = load float *@f32
   %2 = insertelement <4 x float> undef, float %1, i32 0
@@ -79,7 +89,7 @@ define void @nonconst_v4f32() nounwind {
 }
 
 define void @nonconst_v2f64() nounwind {
-  ; MIPS32: nonconst_v2f64:
+  ; MIPS32-LABEL: nonconst_v2f64:
 
   %1 = load double *@f64
   %2 = insertelement <2 x double> undef, double %1, i32 0
@@ -93,7 +103,7 @@ define void @nonconst_v2f64() nounwind {
 }
 
 define float @extract_v4f32() nounwind {
-  ; MIPS32: extract_v4f32:
+  ; MIPS32-LABEL: extract_v4f32:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -111,7 +121,7 @@ define float @extract_v4f32() nounwind {
 }
 
 define float @extract_v4f32_elt0() nounwind {
-  ; MIPS32: extract_v4f32_elt0:
+  ; MIPS32-LABEL: extract_v4f32_elt0:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -128,8 +138,47 @@ define float @extract_v4f32_elt0() nounwind {
   ; MIPS32: .size extract_v4f32_elt0
 }
 
+define float @extract_v4f32_elt2() nounwind {
+  ; MIPS32-LABEL: extract_v4f32_elt2:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x float> %2, i32 2
+  ; Element 2 can be obtained by splatting it across the vector and extracting
+  ; $w0:sub_lo
+  ; MIPS32-DAG: splati.w $w0, [[R1]][2]
+
+  ret float %3
+  ; MIPS32: .size extract_v4f32_elt2
+}
+
+define float @extract_v4f32_vidx() nounwind {
+  ; MIPS32-LABEL: extract_v4f32_vidx:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x float> %2, i32 %3
+  ; MIPS32-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]]
+
+  ret float %4
+  ; MIPS32: .size extract_v4f32_vidx
+}
+
 define double @extract_v2f64() nounwind {
-  ; MIPS32: extract_v2f64:
+  ; MIPS32-LABEL: extract_v2f64:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -152,7 +201,7 @@ define double @extract_v2f64() nounwind {
 }
 
 define double @extract_v2f64_elt0() nounwind {
-  ; MIPS32: extract_v2f64_elt0:
+  ; MIPS32-LABEL: extract_v2f64_elt0:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -172,8 +221,29 @@ define double @extract_v2f64_elt0() nounwind {
   ; MIPS32: .size extract_v2f64_elt0
 }
 
+define double @extract_v2f64_vidx() nounwind {
+  ; MIPS32-LABEL: extract_v2f64_vidx:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x double> %2, i32 %3
+  ; MIPS32-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]]
+
+  ret double %4
+  ; MIPS32: .size extract_v2f64_vidx
+}
+
 define void @insert_v4f32(float %a) nounwind {
-  ; MIPS32: insert_v4f32:
+  ; MIPS32-LABEL: insert_v4f32:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -190,7 +260,7 @@ define void @insert_v4f32(float %a) nounwind {
 }
 
 define void @insert_v2f64(double %a) nounwind {
-  ; MIPS32: insert_v2f64:
+  ; MIPS32-LABEL: insert_v2f64:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -205,3 +275,55 @@ define void @insert_v2f64(double %a) nounwind {
   ret void
   ; MIPS32: .size insert_v2f64
 }
+
+define void @insert_v4f32_vidx(float %a) nounwind {
+  ; MIPS32-LABEL: insert_v4f32_vidx:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %3 = insertelement <4 x float> %1, float %a, i32 %2
+  ; float argument passed in $f12
+  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-DAG: insve.w [[R1]][0], $w12[0]
+  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <4 x float> %3, <4 x float>* @v4f32
+  ; MIPS32-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v4f32_vidx
+}
+
+define void @insert_v2f64_vidx(double %a) nounwind {
+  ; MIPS32-LABEL: insert_v2f64_vidx:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %3 = insertelement <2 x double> %1, double %a, i32 %2
+  ; double argument passed in $f12
+  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-DAG: insve.d [[R1]][0], $w12[0]
+  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <2 x double> %3, <2 x double>* @v2f64
+  ; MIPS32-DAG: st.d [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v2f64_vidx
+}
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
index 9a88c47b7e1f2..5d57198a93558 100644
--- a/test/CodeGen/Mips/msa/bitwise.ll
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -990,9 +990,10 @@ define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>*
   %6 = and <16 x i8> %2, %4
   %7 = or <16 x i8> %5, %6
   ; bmnz is the same operation
-  ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
+  ; (vselect Mask, IfSet, IfClr) -> (BMNZ IfClr, IfSet, Mask)
+  ; CHECK-DAG: bmnz.v [[R2]], [[R1]], [[R3]]
   store <16 x i8> %7, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R1]], 0($4)
+  ; CHECK-DAG: st.b [[R2]], 0($4)
 
   ret void
   ; CHECK: .size bsel_v16i8
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
index 6408d7ba09f43..87ca1482da81f 100644
--- a/test/CodeGen/Mips/msa/compare.ll
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -761,7 +761,8 @@ define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
   %4 = icmp sgt <8 x i16> %1, %2
   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <8 x i16> %5, <8 x i16>* %d
   ; CHECK-DAG: st.h [[R4]], 0($4)
 
@@ -782,7 +783,8 @@ define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
   %4 = icmp sgt <4 x i32> %1, %2
   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <4 x i32> %5, <4 x i32>* %d
   ; CHECK-DAG: st.w [[R4]], 0($4)
 
@@ -803,7 +805,8 @@ define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
   %4 = icmp sgt <2 x i64> %1, %2
   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <2 x i64> %5, <2 x i64>* %d
   ; CHECK-DAG: st.d [[R4]], 0($4)
 
@@ -846,7 +849,8 @@ define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
   %4 = icmp ugt <8 x i16> %1, %2
   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <8 x i16> %5, <8 x i16>* %d
   ; CHECK-DAG: st.h [[R4]], 0($4)
 
@@ -867,7 +871,8 @@ define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
   %4 = icmp ugt <4 x i32> %1, %2
   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <4 x i32> %5, <4 x i32>* %d
   ; CHECK-DAG: st.w [[R4]], 0($4)
 
@@ -888,7 +893,8 @@ define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
   %4 = icmp ugt <2 x i64> %1, %2
   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <2 x i64> %5, <2 x i64>* %d
   ; CHECK-DAG: st.d [[R4]], 0($4)
 
@@ -906,7 +912,7 @@ define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <16 x i8> %1, %2
   ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %4 = select <16 x i1> %3, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %1
   ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
   store <16 x i8> %4, <16 x i8>* %d
   ; CHECK-DAG: st.b [[R4]], 0($4)
@@ -925,7 +931,7 @@ define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <8 x i16> %1, %2
   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = select <8 x i1> %3, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %1
   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <8 x i16> %4, <8 x i16>* %d
@@ -945,7 +951,7 @@ define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <4 x i32> %1, %2
   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %4 = select <4 x i1> %3, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %1
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <4 x i32> %4, <4 x i32>* %d
@@ -965,7 +971,7 @@ define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <2 x i64> %1, %2
   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  %4 = select <2 x i1> %3, <2 x i64> <i64 1, i64 1>, <2 x i64> %1
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <2 x i64> %4, <2 x i64>* %d
@@ -985,7 +991,7 @@ define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <16 x i8> %1, %2
   ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %4 = select <16 x i1> %3, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %1
   ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
   store <16 x i8> %4, <16 x i8>* %d
   ; CHECK-DAG: st.b [[R4]], 0($4)
@@ -1004,7 +1010,7 @@ define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <8 x i16> %1, %2
   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = select <8 x i1> %3, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %1
   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <8 x i16> %4, <8 x i16>* %d
@@ -1024,7 +1030,7 @@ define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <4 x i32> %1, %2
   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %4 = select <4 x i1> %3, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %1
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <4 x i32> %4, <4 x i32>* %d
@@ -1044,7 +1050,7 @@ define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <2 x i64> %1, %2
   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
-  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  %4 = select <2 x i1> %3, <2 x i64> <i64 1, i64 1>, <2 x i64> %1
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   store <2 x i64> %4, <2 x i64>* %d
diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll
index 2fc61f89c7fa7..e93221b93612f 100644
--- a/test/CodeGen/Mips/msa/compare_float.ll
+++ b/test/CodeGen/Mips/msa/compare_float.ll
@@ -32,12 +32,9 @@ define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) noun
   store <2 x i64> %4, <2 x i64>* %c
   ret void
 
-  ; FIXME: This code is correct, but poor. Ideally it would be similar to
-  ;        the code in @false_v4f32
+  ; (setcc $a, $b, SETFALSE) is always folded
   ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
-  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
-  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
-  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; CHECK-DAG: st.w [[R1]], 0($4)
   ; CHECK: .size false_v2f64
 }
 
@@ -509,12 +506,9 @@ define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounw
   store <2 x i64> %4, <2 x i64>* %c
   ret void
 
-  ; FIXME: This code is correct, but poor. Ideally it would be similar to
-  ;        the code in @true_v4f32
-  ; CHECK-DAG: ldi.d [[R1:\$w[0-9]+]], 1
-  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
-  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
-  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; (setcc $a, $b, SETTRUE) is always folded.
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1
+  ; CHECK-DAG: st.w [[R1]], 0($4)
   ; CHECK: .size true_v2f64
 }
 
@@ -531,7 +525,8 @@ define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
   %4 = fcmp ogt <4 x float> %1, %2
   ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <4 x float> %5, <4 x float>* %d
   ; CHECK-DAG: st.w [[R4]], 0($4)
 
@@ -552,7 +547,8 @@ define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
   %4 = fcmp ogt <2 x double> %1, %2
   ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
   store <2 x double> %5, <2 x double>* %d
   ; CHECK-DAG: st.d [[R4]], 0($4)
 
@@ -571,7 +567,8 @@ define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
   %3 = fcmp ogt <4 x float> %1, %2
   ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3:\$w[0-9]+]], [[R1]]
   store <4 x float> %4, <4 x float>* %d
   ; CHECK-DAG: st.w [[R4]], 0($4)
 
@@ -590,7 +587,8 @@ define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
   %3 = fcmp ogt <2 x double> %1, %2
   ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer
-  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  ; Note that IfSet and IfClr are swapped since the condition is inverted
+  ; CHECK-DAG: bsel.v [[R4]], [[R3:\$w[0-9]+]], [[R1]]
   store <2 x double> %4, <2 x double>* %d
   ; CHECK-DAG: st.d [[R4]], 0($4)
 
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
index ed3e52cbffc29..0dd75fa3db12a 100644
--- a/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -1,8 +1,14 @@
 ; Test the MSA intrinsics that are encoded with the ELM instruction format and
 ; are element extraction operations.
 
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
 
 @llvm_mips_copy_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_copy_s_b_RES  = global i32 0, align 16
@@ -17,11 +23,15 @@ entry:
 
 declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
 
-; CHECK: llvm_mips_copy_s_b_test:
-; CHECK: ld.b
-; CHECK: copy_s.b
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_s_b_test
+; MIPS-ANY: llvm_mips_copy_s_b_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_b_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_b_ARG1)
+; MIPS-ANY-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_s.b [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_b_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_b_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_s_b_test
 ;
 @llvm_mips_copy_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_copy_s_h_RES  = global i32 0, align 16
@@ -36,11 +46,15 @@ entry:
 
 declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
 
-; CHECK: llvm_mips_copy_s_h_test:
-; CHECK: ld.h
-; CHECK: copy_s.h
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_s_h_test
+; MIPS-ANY: llvm_mips_copy_s_h_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_h_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_h_ARG1)
+; MIPS-ANY-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_s.h [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_h_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_h_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_s_h_test
 ;
 @llvm_mips_copy_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_copy_s_w_RES  = global i32 0, align 16
@@ -55,11 +69,15 @@ entry:
 
 declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
 
-; CHECK: llvm_mips_copy_s_w_test:
-; CHECK: ld.w
-; CHECK: copy_s.w
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_s_w_test
+; MIPS-ANY: llvm_mips_copy_s_w_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_w_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_w_ARG1)
+; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_s.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_w_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_w_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_s_w_test
 ;
 @llvm_mips_copy_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_copy_s_d_RES  = global i64 0, align 16
@@ -74,13 +92,20 @@ entry:
 
 declare i64 @llvm.mips.copy.s.d(<2 x i64>, i32) nounwind
 
-; CHECK: llvm_mips_copy_s_d_test:
-; CHECK: ld.w
-; CHECK: copy_s.w
-; CHECK: copy_s.w
-; CHECK: sw
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_s_d_test
+; MIPS-ANY: llvm_mips_copy_s_d_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_d_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_d_ARG1)
+; MIPS32-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
+; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
+; MIPS64-DAG: copy_s.d [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_d_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_d_RES)
+; MIPS32-DAG: sw [[RD1]], 0([[RES]])
+; MIPS32-DAG: sw [[RD2]], 4([[RES]])
+; MIPS64-DAG: sd [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_s_d_test
 ;
 @llvm_mips_copy_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_copy_u_b_RES  = global i32 0, align 16
@@ -95,11 +120,15 @@ entry:
 
 declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind
 
-; CHECK: llvm_mips_copy_u_b_test:
-; CHECK: ld.b
-; CHECK: copy_u.b
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_u_b_test
+; MIPS-ANY: llvm_mips_copy_u_b_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_b_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_b_ARG1)
+; MIPS-ANY-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_u.b [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_b_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_b_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_u_b_test
 ;
 @llvm_mips_copy_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_copy_u_h_RES  = global i32 0, align 16
@@ -114,11 +143,15 @@ entry:
 
 declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind
 
-; CHECK: llvm_mips_copy_u_h_test:
-; CHECK: ld.h
-; CHECK: copy_u.h
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_u_h_test
+; MIPS-ANY: llvm_mips_copy_u_h_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_h_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_h_ARG1)
+; MIPS-ANY-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_u.h [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_h_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_h_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_u_h_test
 ;
 @llvm_mips_copy_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_copy_u_w_RES  = global i32 0, align 16
@@ -133,11 +166,15 @@ entry:
 
 declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
 
-; CHECK: llvm_mips_copy_u_w_test:
-; CHECK: ld.w
-; CHECK: copy_u.w
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_u_w_test
+; MIPS-ANY: llvm_mips_copy_u_w_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_w_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_ARG1)
+; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_w_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_RES)
+; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_u_w_test
 ;
 @llvm_mips_copy_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_copy_u_d_RES  = global i64 0, align 16
@@ -152,11 +189,18 @@ entry:
 
 declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
 
-; CHECK: llvm_mips_copy_u_d_test:
-; CHECK: ld.w
-; CHECK: copy_s.w
-; CHECK: copy_s.w
-; CHECK: sw
-; CHECK: sw
-; CHECK: .size llvm_mips_copy_u_d_test
+; MIPS-ANY: llvm_mips_copy_u_d_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_d_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_ARG1)
+; MIPS32-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
+; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
+; MIPS64-DAG: copy_u.d [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_d_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_RES)
+; MIPS32-DAG: sw [[RD1]], 0([[RES]])
+; MIPS32-DAG: sw [[RD2]], 4([[RES]])
+; MIPS64-DAG: sd [[RD]], 0([[RES]])
+; MIPS-ANY: .size llvm_mips_copy_u_d_test
 ;
diff --git a/test/CodeGen/Mips/msa/elm_insv.ll b/test/CodeGen/Mips/msa/elm_insv.ll
index fa7ceaf0c6bf3..c746e523def6d 100644
--- a/test/CodeGen/Mips/msa/elm_insv.ll
+++ b/test/CodeGen/Mips/msa/elm_insv.ll
@@ -1,8 +1,14 @@
 ; Test the MSA element insertion intrinsics that are encoded with the ELM
 ; instruction format.
 
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
 
 @llvm_mips_insert_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_insert_b_ARG3 = global i32 27, align 16
@@ -19,12 +25,12 @@ entry:
 
 declare <16 x i8> @llvm.mips.insert.b(<16 x i8>, i32, i32) nounwind
 
-; CHECK: llvm_mips_insert_b_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
-; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0(
-; CHECK-DAG: insert.b [[R2]][1], [[R1]]
-; CHECK-DAG: st.b [[R2]], 0(
-; CHECK: .size llvm_mips_insert_b_test
+; MIPS-ANY: llvm_mips_insert_b_test:
+; MIPS-ANY-DAG: lw [[R1:\$[0-9]+]], 0(
+; MIPS-ANY-DAG: ld.b [[R2:\$w[0-9]+]], 0(
+; MIPS-ANY-DAG: insert.b [[R2]][1], [[R1]]
+; MIPS-ANY-DAG: st.b [[R2]], 0(
+; MIPS-ANY: .size llvm_mips_insert_b_test
 ;
 @llvm_mips_insert_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_insert_h_ARG3 = global i32 27, align 16
@@ -41,12 +47,12 @@ entry:
 
 declare <8 x i16> @llvm.mips.insert.h(<8 x i16>, i32, i32) nounwind
 
-; CHECK: llvm_mips_insert_h_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
-; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0(
-; CHECK-DAG: insert.h [[R2]][1], [[R1]]
-; CHECK-DAG: st.h [[R2]], 0(
-; CHECK: .size llvm_mips_insert_h_test
+; MIPS-ANY: llvm_mips_insert_h_test:
+; MIPS-ANY-DAG: lw [[R1:\$[0-9]+]], 0(
+; MIPS-ANY-DAG: ld.h [[R2:\$w[0-9]+]], 0(
+; MIPS-ANY-DAG: insert.h [[R2]][1], [[R1]]
+; MIPS-ANY-DAG: st.h [[R2]], 0(
+; MIPS-ANY: .size llvm_mips_insert_h_test
 ;
 @llvm_mips_insert_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_insert_w_ARG3 = global i32 27, align 16
@@ -63,12 +69,12 @@ entry:
 
 declare <4 x i32> @llvm.mips.insert.w(<4 x i32>, i32, i32) nounwind
 
-; CHECK: llvm_mips_insert_w_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
-; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0(
-; CHECK-DAG: insert.w [[R2]][1], [[R1]]
-; CHECK-DAG: st.w [[R2]], 0(
-; CHECK: .size llvm_mips_insert_w_test
+; MIPS-ANY: llvm_mips_insert_w_test:
+; MIPS-ANY-DAG: lw [[R1:\$[0-9]+]], 0(
+; MIPS-ANY-DAG: ld.w [[R2:\$w[0-9]+]], 0(
+; MIPS-ANY-DAG: insert.w [[R2]][1], [[R1]]
+; MIPS-ANY-DAG: st.w [[R2]], 0(
+; MIPS-ANY: .size llvm_mips_insert_w_test
 ;
 @llvm_mips_insert_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_insert_d_ARG3 = global i64 27, align 16
@@ -85,14 +91,18 @@ entry:
 
 declare <2 x i64> @llvm.mips.insert.d(<2 x i64>, i32, i64) nounwind
 
-; CHECK: llvm_mips_insert_d_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
-; CHECK-DAG: ld.w [[R3:\$w[0-9]+]],
-; CHECK-DAG: insert.w [[R3]][2], [[R1]]
-; CHECK-DAG: insert.w [[R3]][3], [[R2]]
-; CHECK-DAG: st.w [[R3]],
-; CHECK: .size llvm_mips_insert_d_test
+; MIPS-ANY: llvm_mips_insert_d_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], 0(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], 4(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], 0(
+; MIPS32-DAG: ld.w [[R3:\$w[0-9]+]],
+; MIPS64-DAG: ld.d [[W1:\$w[0-9]+]],
+; MIPS32-DAG: insert.w [[R3]][2], [[R1]]
+; MIPS32-DAG: insert.w [[R3]][3], [[R2]]
+; MIPS64-DAG: insert.d [[W1]][1], [[R1]]
+; MIPS32-DAG: st.w [[R3]],
+; MIPS64-DAG: st.d [[W1]],
+; MIPS-ANY: .size llvm_mips_insert_d_test
 ;
 @llvm_mips_insve_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_insve_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
@@ -109,14 +119,16 @@ entry:
 
 declare <16 x i8> @llvm.mips.insve.b(<16 x i8>, i32, <16 x i8>) nounwind
 
-; CHECK: llvm_mips_insve_b_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_b_ARG1)(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_b_ARG3)(
-; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
-; CHECK-DAG: insve.b [[R3]][1], [[R4]][0]
-; CHECK-DAG: st.b [[R3]],
-; CHECK: .size llvm_mips_insve_b_test
+; MIPS-ANY: llvm_mips_insve_b_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_b_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_b_ARG3)(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_insve_b_ARG1)(
+; MIPS64-DAG: ld [[R2:\$[0-9]+]], %got_disp(llvm_mips_insve_b_ARG3)(
+; MIPS-ANY-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; MIPS-ANY-DAG: insve.b [[R3]][1], [[R4]][0]
+; MIPS-ANY-DAG: st.b [[R3]],
+; MIPS-ANY: .size llvm_mips_insve_b_test
 ;
 @llvm_mips_insve_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_insve_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
@@ -133,14 +145,16 @@ entry:
 
 declare <8 x i16> @llvm.mips.insve.h(<8 x i16>, i32, <8 x i16>) nounwind
 
-; CHECK: llvm_mips_insve_h_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_h_ARG1)(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_h_ARG3)(
-; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
-; CHECK-DAG: insve.h [[R3]][1], [[R4]][0]
-; CHECK-DAG: st.h [[R3]],
-; CHECK: .size llvm_mips_insve_h_test
+; MIPS-ANY: llvm_mips_insve_h_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_h_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_h_ARG3)(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_insve_h_ARG1)(
+; MIPS64-DAG: ld [[R2:\$[0-9]+]], %got_disp(llvm_mips_insve_h_ARG3)(
+; MIPS-ANY-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; MIPS-ANY-DAG: insve.h [[R3]][1], [[R4]][0]
+; MIPS-ANY-DAG: st.h [[R3]],
+; MIPS-ANY: .size llvm_mips_insve_h_test
 ;
 @llvm_mips_insve_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_insve_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
@@ -157,14 +171,16 @@ entry:
 
 declare <4 x i32> @llvm.mips.insve.w(<4 x i32>, i32, <4 x i32>) nounwind
 
-; CHECK: llvm_mips_insve_w_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_w_ARG1)(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_w_ARG3)(
-; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
-; CHECK-DAG: insve.w [[R3]][1], [[R4]][0]
-; CHECK-DAG: st.w [[R3]],
-; CHECK: .size llvm_mips_insve_w_test
+; MIPS-ANY: llvm_mips_insve_w_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_w_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_w_ARG3)(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_insve_w_ARG1)(
+; MIPS64-DAG: ld [[R2:\$[0-9]+]], %got_disp(llvm_mips_insve_w_ARG3)(
+; MIPS-ANY-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; MIPS-ANY-DAG: insve.w [[R3]][1], [[R4]][0]
+; MIPS-ANY-DAG: st.w [[R3]],
+; MIPS-ANY: .size llvm_mips_insve_w_test
 ;
 @llvm_mips_insve_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_insve_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
@@ -181,12 +197,14 @@ entry:
 
 declare <2 x i64> @llvm.mips.insve.d(<2 x i64>, i32, <2 x i64>) nounwind
 
-; CHECK: llvm_mips_insve_d_test:
-; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_d_ARG1)(
-; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_d_ARG3)(
-; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
-; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
-; CHECK-DAG: insve.d [[R3]][1], [[R4]][0]
-; CHECK-DAG: st.d [[R3]],
-; CHECK: .size llvm_mips_insve_d_test
+; MIPS-ANY: llvm_mips_insve_d_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_d_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_d_ARG3)(
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_insve_d_ARG1)(
+; MIPS64-DAG: ld [[R2:\$[0-9]+]], %got_disp(llvm_mips_insve_d_ARG3)(
+; MIPS-ANY-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS-ANY-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; MIPS-ANY-DAG: insve.d [[R3]][1], [[R4]][0]
+; MIPS-ANY-DAG: st.d [[R3]],
+; MIPS-ANY: .size llvm_mips_insve_d_test
 ;
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
index 39d670dac8419..00a6544b12070 100644
--- a/test/CodeGen/Mips/msa/elm_shift_slide.ll
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -5,17 +5,19 @@
 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
 
 @llvm_mips_sldi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sldi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_sldi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
 
 define void @llvm_mips_sldi_b_test() nounwind {
 entry:
   %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1
-  %1 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, i32 1)
-  store <16 x i8> %1, <16 x i8>* @llvm_mips_sldi_b_RES
+  %1 = load <16 x i8>* @llvm_mips_sldi_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, <16 x i8> %1, i32 1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sldi_b_RES
   ret void
 }
 
-declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind
+declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, <16 x i8>, i32) nounwind
 
 ; CHECK: llvm_mips_sldi_b_test:
 ; CHECK: ld.b
@@ -24,17 +26,19 @@ declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind
 ; CHECK: .size llvm_mips_sldi_b_test
 ;
 @llvm_mips_sldi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sldi_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_sldi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
 
 define void @llvm_mips_sldi_h_test() nounwind {
 entry:
   %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1
-  %1 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, i32 1)
-  store <8 x i16> %1, <8 x i16>* @llvm_mips_sldi_h_RES
+  %1 = load <8 x i16>* @llvm_mips_sldi_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, <8 x i16> %1, i32 1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sldi_h_RES
   ret void
 }
 
-declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, <8 x i16>, i32) nounwind
 
 ; CHECK: llvm_mips_sldi_h_test:
 ; CHECK: ld.h
@@ -43,17 +47,19 @@ declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind
 ; CHECK: .size llvm_mips_sldi_h_test
 ;
 @llvm_mips_sldi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sldi_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_sldi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
 
 define void @llvm_mips_sldi_w_test() nounwind {
 entry:
   %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1
-  %1 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, i32 1)
-  store <4 x i32> %1, <4 x i32>* @llvm_mips_sldi_w_RES
+  %1 = load <4 x i32>* @llvm_mips_sldi_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, <4 x i32> %1, i32 1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sldi_w_RES
   ret void
 }
 
-declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind
+declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, <4 x i32>, i32) nounwind
 
 ; CHECK: llvm_mips_sldi_w_test:
 ; CHECK: ld.w
@@ -62,17 +68,19 @@ declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind
 ; CHECK: .size llvm_mips_sldi_w_test
 ;
 @llvm_mips_sldi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sldi_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_sldi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
 
 define void @llvm_mips_sldi_d_test() nounwind {
 entry:
   %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1
-  %1 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, i32 1)
-  store <2 x i64> %1, <2 x i64>* @llvm_mips_sldi_d_RES
+  %1 = load <2 x i64>* @llvm_mips_sldi_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, <2 x i64> %1, i32 1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sldi_d_RES
   ret void
 }
 
-declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, i32) nounwind
+declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, <2 x i64>, i32) nounwind
 
 ; CHECK: llvm_mips_sldi_d_test:
 ; CHECK: ld.d
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
index 3088e1ba9893b..07e67bf042875 100644
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -83,3 +83,312 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
   ret void
   ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
 }
+
+define void @loadstore_v8i16_near() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_near:
+
+  %1 = alloca <8 x i16>
+  %2 = load volatile <8 x i16>* %1
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <8 x i16> %2, <8 x i16>* %1
+  ; MIPS32-AE: st.h [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_near
+}
+
+define void @loadstore_v8i16_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_unaligned:
+
+  %1 = alloca [2 x <8 x i16>]
+  %2 = bitcast [2 x <8 x i16>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <8 x i16>]*
+  %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0
+
+  %6 = load volatile <8 x i16>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <8 x i16> %6, <8 x i16>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_unaligned
+}
+
+define void @loadstore_v8i16_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_just_under_simm10:
+
+  %1 = alloca <8 x i16>
+  %2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
+
+  %3 = load volatile <8 x i16>* %1
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
+  store volatile <8 x i16> %3, <8 x i16>* %1
+  ; MIPS32-AE: st.h [[R1]], 1008($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_just_under_simm10
+}
+
+define void @loadstore_v8i16_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_just_over_simm10:
+
+  %1 = alloca <8 x i16>
+  %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
+
+  %3 = load volatile <8 x i16>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <8 x i16> %3, <8 x i16>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_just_over_simm10
+}
+
+define void @loadstore_v8i16_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_just_under_simm16:
+
+  %1 = alloca <8 x i16>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <8 x i16>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <8 x i16> %3, <8 x i16>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_just_under_simm16
+}
+
+define void @loadstore_v8i16_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_just_over_simm16:
+
+  %1 = alloca <8 x i16>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <8 x i16>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <8 x i16> %3, <8 x i16>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_just_over_simm16
+}
+
+define void @loadstore_v4i32_near() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_near:
+
+  %1 = alloca <4 x i32>
+  %2 = load volatile <4 x i32>* %1
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <4 x i32> %2, <4 x i32>* %1
+  ; MIPS32-AE: st.w [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_near
+}
+
+define void @loadstore_v4i32_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_unaligned:
+
+  %1 = alloca [2 x <4 x i32>]
+  %2 = bitcast [2 x <4 x i32>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <4 x i32>]*
+  %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0
+
+  %6 = load volatile <4 x i32>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <4 x i32> %6, <4 x i32>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_unaligned
+}
+
+define void @loadstore_v4i32_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_just_under_simm10:
+
+  %1 = alloca <4 x i32>
+  %2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
+
+  %3 = load volatile <4 x i32>* %1
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
+  store volatile <4 x i32> %3, <4 x i32>* %1
+  ; MIPS32-AE: st.w [[R1]], 2032($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_just_under_simm10
+}
+
+define void @loadstore_v4i32_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_just_over_simm10:
+
+  %1 = alloca <4 x i32>
+  %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
+
+  %3 = load volatile <4 x i32>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <4 x i32> %3, <4 x i32>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_just_over_simm10
+}
+
+define void @loadstore_v4i32_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_just_under_simm16:
+
+  %1 = alloca <4 x i32>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <4 x i32>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <4 x i32> %3, <4 x i32>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_just_under_simm16
+}
+
+define void @loadstore_v4i32_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_just_over_simm16:
+
+  %1 = alloca <4 x i32>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <4 x i32>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <4 x i32> %3, <4 x i32>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_just_over_simm16
+}
+
+define void @loadstore_v2i64_near() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_near:
+
+  %1 = alloca <2 x i64>
+  %2 = load volatile <2 x i64>* %1
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <2 x i64> %2, <2 x i64>* %1
+  ; MIPS32-AE: st.d [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_near
+}
+
+define void @loadstore_v2i64_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_unaligned:
+
+  %1 = alloca [2 x <2 x i64>]
+  %2 = bitcast [2 x <2 x i64>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <2 x i64>]*
+  %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0
+
+  %6 = load volatile <2 x i64>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <2 x i64> %6, <2 x i64>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_unaligned
+}
+
+define void @loadstore_v2i64_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_just_under_simm10:
+
+  %1 = alloca <2 x i64>
+  %2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
+
+  %3 = load volatile <2 x i64>* %1
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
+  store volatile <2 x i64> %3, <2 x i64>* %1
+  ; MIPS32-AE: st.d [[R1]], 4080($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_just_under_simm10
+}
+
+define void @loadstore_v2i64_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_just_over_simm10:
+
+  %1 = alloca <2 x i64>
+  %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
+
+  %3 = load volatile <2 x i64>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <2 x i64> %3, <2 x i64>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_just_over_simm10
+}
+
+define void @loadstore_v2i64_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_just_under_simm16:
+
+  %1 = alloca <2 x i64>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <2 x i64>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <2 x i64> %3, <2 x i64>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_just_under_simm16
+}
+
+define void @loadstore_v2i64_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_just_over_simm16:
+
+  %1 = alloca <2 x i64>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <2 x i64>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <2 x i64> %3, <2 x i64>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_just_over_simm16
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
index 24e27cbf14b83..f25ab22806026 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
@@ -10,7 +10,7 @@
 ; The legalizer legalized ; the <4 x i8>'s into <4 x i32>'s, then a call to
 ; isVSplat() returned the splat value for <i8 -1, i8 -1, ...> as a 32-bit APInt
 ; (255), but the zeroinitializer splat value as an 8-bit APInt (0). The
-; assertion occured when trying to check the values were bitwise inverses of
+; assertion occurred when trying to check the values were bitwise inverses of
 ; each-other.
 ;
 ; It should at least successfully build.
diff --git a/test/CodeGen/Mips/msa/shift-dagcombine.ll b/test/CodeGen/Mips/msa/shift-dagcombine.ll
index 0d809fb4fbf17..322acff3ff49a 100644
--- a/test/CodeGen/Mips/msa/shift-dagcombine.ll
+++ b/test/CodeGen/Mips/msa/shift-dagcombine.ll
@@ -37,7 +37,8 @@ define void @lshr_v4i32(<4 x i32>* %c) nounwind {
   %2 = lshr <4 x i32> <i32 -2, i32 -4, i32 -8, i32 -16>,
                       <i32 0, i32 1, i32 2, i32 3>
   ; CHECK-NOT: srl
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[CPOOL:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0([[CPOOL]])
   ; CHECK-NOT: srl
   store volatile <4 x i32> %2, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R1]], 0($4)
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
index 316c669c3ac68..faeec5d58dd49 100644
--- a/test/CodeGen/Mips/msa/shuffle.ll
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -7,7 +7,8 @@ define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
   %1 = load <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
   store <16 x i8> %2, <16 x i8>* %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
@@ -37,7 +38,8 @@ define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
   %2 = load <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
-  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
@@ -54,8 +56,11 @@ define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
   %2 = load <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
-  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
+  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
+  ; the operands to get the right answer.
+  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]]
   store <16 x i8> %3, <16 x i8>* %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
 
@@ -83,7 +88,8 @@ define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   %1 = load <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
   store <8 x i16> %2, <8 x i16>* %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
@@ -113,7 +119,8 @@ define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   %2 = load <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
-  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
@@ -130,8 +137,11 @@ define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   %2 = load <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
-  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
+  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
+  ; the operands to get the right answer.
+  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]]
   store <8 x i16> %3, <8 x i16>* %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
 
@@ -207,8 +217,11 @@ define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
   %2 = load <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
-  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
+  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
+  ; the operands to get the right answer.
+  ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]]
   store <4 x i32> %3, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -236,7 +249,8 @@ define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   %1 = load <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
   store <2 x i64> %2, <2 x i64>* %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
@@ -266,7 +280,8 @@ define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   %2 = load <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
   ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
@@ -283,8 +298,11 @@ define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   %2 = load <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
+  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
+  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
+  ; the operands to get the right answer.
+  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]]
   store <2 x i64> %3, <2 x i64>* %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
 
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
index 60a4369dfb1c5..b9badf5dc5822 100644
--- a/test/CodeGen/Mips/msa/special.ll
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -1,6 +1,13 @@
 ; Test the MSA intrinsics that are encoded with the SPECIAL instruction format.
 
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS64
+; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS64
 
 define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
 entry:
@@ -10,9 +17,9 @@ entry:
 
 declare i32 @llvm.mips.lsa(i32, i32, i32) nounwind
 
-; CHECK: llvm_mips_lsa_test:
-; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
-; CHECK: .size llvm_mips_lsa_test
+; MIPS32: llvm_mips_lsa_test:
+; MIPS32: lsa {{\$[0-9]+}}, $5, $4, 2
+; MIPS32: .size llvm_mips_lsa_test
 
 define i32 @lsa_test(i32 %a, i32 %b) nounwind {
 entry:
@@ -21,6 +28,29 @@ entry:
   ret i32 %1
 }
 
-; CHECK: lsa_test:
-; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
-; CHECK: .size lsa_test
+; MIPS32: lsa_test:
+; MIPS32: lsa {{\$[0-9]+}}, $5, $4, 2
+; MIPS32: .size lsa_test
+
+define i64 @llvm_mips_dlsa_test(i64 %a, i64 %b) nounwind {
+entry:
+  %0 = tail call i64 @llvm.mips.dlsa(i64 %a, i64 %b, i32 2)
+  ret i64 %0
+}
+
+declare i64 @llvm.mips.dlsa(i64, i64, i32) nounwind
+
+; MIPS64: llvm_mips_dlsa_test:
+; MIPS64: dlsa {{\$[0-9]+}}, $5, $4, 2
+; MIPS64: .size llvm_mips_dlsa_test
+
+define i64 @dlsa_test(i64 %a, i64 %b) nounwind {
+entry:
+  %0 = shl i64 %b, 2
+  %1 = add i64 %a, %0
+  ret i64 %1
+}
+
+; MIPS64: dlsa_test:
+; MIPS64: dlsa {{\$[0-9]+}}, $5, $4, 2
+; MIPS64: .size dlsa_test
diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll
index 5bddf5aea4059..d5b97f52fb834 100644
--- a/test/CodeGen/Mips/msa/vec.ll
+++ b/test/CodeGen/Mips/msa/vec.ll
@@ -104,12 +104,12 @@ entry:
   ret void
 }
 
-; CHECK: and_v_b_test:
-; CHECK: ld.b
-; CHECK: ld.b
-; CHECK: and.v
-; CHECK: st.b
-; CHECK: .size and_v_b_test
+; ANYENDIAN: and_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size and_v_b_test
 ;
 define void @and_v_h_test() nounwind {
 entry:
@@ -120,12 +120,12 @@ entry:
   ret void
 }
 
-; CHECK: and_v_h_test:
-; CHECK: ld.h
-; CHECK: ld.h
-; CHECK: and.v
-; CHECK: st.h
-; CHECK: .size and_v_h_test
+; ANYENDIAN: and_v_h_test:
+; ANYENDIAN: ld.h
+; ANYENDIAN: ld.h
+; ANYENDIAN: and.v
+; ANYENDIAN: st.h
+; ANYENDIAN: .size and_v_h_test
 ;
 
 define void @and_v_w_test() nounwind {
@@ -137,12 +137,12 @@ entry:
   ret void
 }
 
-; CHECK: and_v_w_test:
-; CHECK: ld.w
-; CHECK: ld.w
-; CHECK: and.v
-; CHECK: st.w
-; CHECK: .size and_v_w_test
+; ANYENDIAN: and_v_w_test:
+; ANYENDIAN: ld.w
+; ANYENDIAN: ld.w
+; ANYENDIAN: and.v
+; ANYENDIAN: st.w
+; ANYENDIAN: .size and_v_w_test
 ;
 
 define void @and_v_d_test() nounwind {
@@ -154,12 +154,12 @@ entry:
   ret void
 }
 
-; CHECK: and_v_d_test:
-; CHECK: ld.d
-; CHECK: ld.d
-; CHECK: and.v
-; CHECK: st.d
-; CHECK: .size and_v_d_test
+; ANYENDIAN: and_v_d_test:
+; ANYENDIAN: ld.d
+; ANYENDIAN: ld.d
+; ANYENDIAN: and.v
+; ANYENDIAN: st.d
+; ANYENDIAN: .size and_v_d_test
 ;
 @llvm_mips_bmnz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_bmnz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
@@ -431,9 +431,9 @@ entry:
 ; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
 ; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
 ; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
-; bmnz.v is the same as bsel.v with wt and wd_in swapped
-; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
-; ANYENDIAN-DAG: st.b [[R6]], 0(
+; bmnz.v is the same as bsel.v with (wd_in, wt, ws) -> (wt, ws, wd_in)
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R6]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
 ; ANYENDIAN: .size llvm_mips_bsel_v_b_test
 
 @llvm_mips_bsel_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@@ -462,9 +462,9 @@ entry:
 ; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
 ; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
 ; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
-; bmnz.v is the same as bsel.v with wt and wd_in swapped
-; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
-; ANYENDIAN-DAG: st.b [[R6]], 0(
+; bmnz.v is the same as bsel.v with (wd_in, wt, ws) -> (wt, ws, wd_in)
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R6]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
 ; ANYENDIAN: .size llvm_mips_bsel_v_h_test
 
 @llvm_mips_bsel_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@@ -493,9 +493,9 @@ entry:
 ; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
 ; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
 ; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
-; bmnz.v is the same as bsel.v with wt and wd_in swapped
-; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
-; ANYENDIAN-DAG: st.b [[R6]], 0(
+; bmnz.v is the same as bsel.v with (wd_in, wt, ws) -> (wt, ws, wd_in)
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R6]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
 ; ANYENDIAN: .size llvm_mips_bsel_v_w_test
 
 @llvm_mips_bsel_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@@ -524,9 +524,9 @@ entry:
 ; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
 ; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
 ; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
-; bmnz.v is the same as bsel.v with wt and wd_in swapped
-; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
-; ANYENDIAN-DAG: st.b [[R6]], 0(
+; bmnz.v is the same as bsel.v with (wd_in, wt, ws) -> (wt, ws, wd_in)
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R6]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
 ; ANYENDIAN: .size llvm_mips_bsel_v_d_test
 
 @llvm_mips_nor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@@ -722,12 +722,12 @@ entry:
   ret void
 }
 
-; CHECK: or_v_b_test:
-; CHECK: ld.b
-; CHECK: ld.b
-; CHECK: or.v
-; CHECK: st.b
-; CHECK: .size or_v_b_test
+; ANYENDIAN: or_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size or_v_b_test
 ;
 define void @or_v_h_test() nounwind {
 entry:
@@ -738,12 +738,12 @@ entry:
   ret void
 }
 
-; CHECK: or_v_h_test:
-; CHECK: ld.h
-; CHECK: ld.h
-; CHECK: or.v
-; CHECK: st.h
-; CHECK: .size or_v_h_test
+; ANYENDIAN: or_v_h_test:
+; ANYENDIAN: ld.h
+; ANYENDIAN: ld.h
+; ANYENDIAN: or.v
+; ANYENDIAN: st.h
+; ANYENDIAN: .size or_v_h_test
 ;
 
 define void @or_v_w_test() nounwind {
@@ -755,12 +755,12 @@ entry:
   ret void
 }
 
-; CHECK: or_v_w_test:
-; CHECK: ld.w
-; CHECK: ld.w
-; CHECK: or.v
-; CHECK: st.w
-; CHECK: .size or_v_w_test
+; ANYENDIAN: or_v_w_test:
+; ANYENDIAN: ld.w
+; ANYENDIAN: ld.w
+; ANYENDIAN: or.v
+; ANYENDIAN: st.w
+; ANYENDIAN: .size or_v_w_test
 ;
 
 define void @or_v_d_test() nounwind {
@@ -772,12 +772,12 @@ entry:
   ret void
 }
 
-; CHECK: or_v_d_test:
-; CHECK: ld.d
-; CHECK: ld.d
-; CHECK: or.v
-; CHECK: st.d
-; CHECK: .size or_v_d_test
+; ANYENDIAN: or_v_d_test:
+; ANYENDIAN: ld.d
+; ANYENDIAN: ld.d
+; ANYENDIAN: or.v
+; ANYENDIAN: st.d
+; ANYENDIAN: .size or_v_d_test
 ;
 @llvm_mips_xor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_xor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
@@ -880,12 +880,12 @@ entry:
   ret void
 }
 
-; CHECK: xor_v_b_test:
-; CHECK: ld.b
-; CHECK: ld.b
-; CHECK: xor.v
-; CHECK: st.b
-; CHECK: .size xor_v_b_test
+; ANYENDIAN: xor_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size xor_v_b_test
 ;
 define void @xor_v_h_test() nounwind {
 entry:
@@ -896,12 +896,12 @@ entry:
   ret void
 }
 
-; CHECK: xor_v_h_test:
-; CHECK: ld.h
-; CHECK: ld.h
-; CHECK: xor.v
-; CHECK: st.h
-; CHECK: .size xor_v_h_test
+; ANYENDIAN: xor_v_h_test:
+; ANYENDIAN: ld.h
+; ANYENDIAN: ld.h
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.h
+; ANYENDIAN: .size xor_v_h_test
 ;
 
 define void @xor_v_w_test() nounwind {
@@ -913,12 +913,12 @@ entry:
   ret void
 }
 
-; CHECK: xor_v_w_test:
-; CHECK: ld.w
-; CHECK: ld.w
-; CHECK: xor.v
-; CHECK: st.w
-; CHECK: .size xor_v_w_test
+; ANYENDIAN: xor_v_w_test:
+; ANYENDIAN: ld.w
+; ANYENDIAN: ld.w
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.w
+; ANYENDIAN: .size xor_v_w_test
 ;
 
 define void @xor_v_d_test() nounwind {
@@ -930,12 +930,12 @@ entry:
   ret void
 }
 
-; CHECK: xor_v_d_test:
-; CHECK: ld.d
-; CHECK: ld.d
-; CHECK: xor.v
-; CHECK: st.d
-; CHECK: .size xor_v_d_test
+; ANYENDIAN: xor_v_d_test:
+; ANYENDIAN: ld.d
+; ANYENDIAN: ld.d
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.d
+; ANYENDIAN: .size xor_v_d_test
 ;
 declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind
 declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
diff --git a/test/CodeGen/Mips/nacl-align.ll b/test/CodeGen/Mips/nacl-align.ll
new file mode 100644
index 0000000000000..e61b8347760e4
--- /dev/null
+++ b/test/CodeGen/Mips/nacl-align.ll
@@ -0,0 +1,96 @@
+; RUN: llc -filetype=asm -mtriple=mipsel-none-nacl -relocation-model=static \
+; RUN:     -O3 < %s | FileCheck %s
+
+
+; This test tests that NaCl functions are bundle-aligned.
+
+define void @test0() {
+  ret void
+
+; CHECK:          .align  4
+; CHECK-NOT:      .align
+; CHECK-LABEL:    test0:
+
+}
+
+
+; This test tests that blocks that are jumped to through jump table are
+; bundle-aligned.
+
+define i32 @test1(i32 %i) {
+entry:
+  switch i32 %i, label %default [
+    i32 0, label %bb1
+    i32 1, label %bb2
+    i32 2, label %bb3
+    i32 3, label %bb4
+  ]
+
+bb1:
+  ret i32 111
+bb2:
+  ret i32 222
+bb3:
+  ret i32 333
+bb4:
+  ret i32 444
+default:
+  ret i32 555
+
+
+; CHECK-LABEL:       test1:
+
+; CHECK:             .align  4
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 111
+; CHECK-NEXT:        .align  4
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 222
+; CHECK-NEXT:        .align  4
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 333
+; CHECK-NEXT:        .align  4
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 444
+
+}
+
+
+; This test tests that a block whose address is taken is bundle-aligned in NaCl.
+
+@bb_array = constant [2 x i8*] [i8* blockaddress(@test2, %bb1),
+                                i8* blockaddress(@test2, %bb2)], align 4
+
+define i32 @test2(i32 %i) {
+entry:
+  %elementptr = getelementptr inbounds [2 x i8*]* @bb_array, i32 0, i32 %i
+  %0 = load i8** %elementptr, align 4
+  indirectbr i8* %0, [label %bb1, label %bb2]
+
+bb1:
+  ret i32 111
+bb2:
+  ret i32 222
+
+
+; CHECK-LABEL:       test2:
+
+; Note that there are two consecutive labels - one temporary and one for
+; basic block.
+
+; CHECK:             .align  4
+; CHECK-NEXT:    ${{[a-zA-Z0-9]+}}:
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 111
+; CHECK-NEXT:        .align  4
+; CHECK-NEXT:    ${{[a-zA-Z0-9]+}}:
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 222
+
+}
diff --git a/test/CodeGen/Mips/nacl-branch-delay.ll b/test/CodeGen/Mips/nacl-branch-delay.ll
new file mode 100644
index 0000000000000..d251eee07526f
--- /dev/null
+++ b/test/CodeGen/Mips/nacl-branch-delay.ll
@@ -0,0 +1,71 @@
+; RUN: llc -filetype=asm -mtriple=mipsel-none-linux -relocation-model=static \
+; RUN:     -O3 < %s | FileCheck %s
+
+; RUN: llc -filetype=asm -mtriple=mipsel-none-nacl -relocation-model=static \
+; RUN:     -O3 < %s | FileCheck %s -check-prefix=CHECK-NACL
+
+@x = global i32 0, align 4
+declare void @f1(i32)
+declare void @f2()
+
+
+define void @test1() {
+  %1 = load i32* @x, align 4
+  call void @f1(i32 %1)
+  ret void
+
+
+; CHECK-LABEL:       test1
+
+; We first make sure that for non-NaCl targets branch-delay slot contains
+; dangerous instructions.
+
+; Check that branch-delay slot is used to load argument from x before function
+; call.
+
+; CHECK:             jal
+; CHECK-NEXT:        lw      $4, %lo(x)(${{[0-9]+}})
+
+; Check that branch-delay slot is used for adjusting sp before return.
+
+; CHECK:             jr      $ra
+; CHECK-NEXT:        addiu   $sp, $sp, {{[0-9]+}}
+
+
+; For NaCl, check that branch-delay slot doesn't contain dangerous instructions.
+
+; CHECK-NACL:             jal
+; CHECK-NACL-NEXT:        nop
+
+; CHECK-NACL:             jr      $ra
+; CHECK-NACL-NEXT:        nop
+}
+
+
+define void @test2() {
+  store i32 1, i32* @x, align 4
+  tail call void @f2()
+  ret void
+
+
+; CHECK-LABEL:       test2
+
+; Check that branch-delay slot is used for storing to x before function call.
+
+; CHECK:             jal
+; CHECK-NEXT:        sw      ${{[0-9]+}}, %lo(x)(${{[0-9]+}})
+
+; Check that branch-delay slot is used for adjusting sp before return.
+
+; CHECK:             jr      $ra
+; CHECK-NEXT:        addiu   $sp, $sp, {{[0-9]+}}
+
+
+; For NaCl, check that branch-delay slot doesn't contain dangerous instructions.
+
+; CHECK-NACL:             jal
+; CHECK-NACL-NEXT:        nop
+
+; CHECK-NACL:             jr      $ra
+; CHECK-NACL-NEXT:        nop
+}
diff --git a/test/CodeGen/Mips/nacl-reserved-regs.ll b/test/CodeGen/Mips/nacl-reserved-regs.ll
new file mode 100644
index 0000000000000..ae21283b1fb7e
--- /dev/null
+++ b/test/CodeGen/Mips/nacl-reserved-regs.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=mipsel -O3 < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-none-nacl-gnu -O3 < %s \
+; RUN:  | FileCheck %s -check-prefix=CHECK-NACL
+
+@var = external global i32
+
+define void @f() {
+  %val1 = load volatile i32* @var
+  %val2 = load volatile i32* @var
+  %val3 = load volatile i32* @var
+  %val4 = load volatile i32* @var
+  %val5 = load volatile i32* @var
+  %val6 = load volatile i32* @var
+  %val7 = load volatile i32* @var
+  %val8 = load volatile i32* @var
+  %val9 = load volatile i32* @var
+  %val10 = load volatile i32* @var
+  %val11 = load volatile i32* @var
+  %val12 = load volatile i32* @var
+  %val13 = load volatile i32* @var
+  %val14 = load volatile i32* @var
+  %val15 = load volatile i32* @var
+  %val16 = load volatile i32* @var
+  store volatile i32 %val1, i32* @var
+  store volatile i32 %val2, i32* @var
+  store volatile i32 %val3, i32* @var
+  store volatile i32 %val4, i32* @var
+  store volatile i32 %val5, i32* @var
+  store volatile i32 %val6, i32* @var
+  store volatile i32 %val7, i32* @var
+  store volatile i32 %val8, i32* @var
+  store volatile i32 %val9, i32* @var
+  store volatile i32 %val10, i32* @var
+  store volatile i32 %val11, i32* @var
+  store volatile i32 %val12, i32* @var
+  store volatile i32 %val13, i32* @var
+  store volatile i32 %val14, i32* @var
+  store volatile i32 %val15, i32* @var
+  store volatile i32 %val16, i32* @var
+  ret void
+
+; Check that t6, t7 and t8 are used in non-NaCl code.
+; CHECK:    lw  $14
+; CHECK:    lw  $15
+; CHECK:    lw  $24
+
+; t6, t7 and t8 are reserved in NaCl.
+; CHECK-NACL-NOT:    lw  $14
+; CHECK-NACL-NOT:    lw  $15
+; CHECK-NACL-NOT:    lw  $24
+}
diff --git a/test/CodeGen/Mips/no-odd-spreg.ll b/test/CodeGen/Mips/no-odd-spreg.ll
new file mode 100644
index 0000000000000..572e940bc4675
--- /dev/null
+++ b/test/CodeGen/Mips/no-odd-spreg.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG -check-prefix=ODDSPREG-NO-EMIT
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG -check-prefix=ODDSPREG-NO-EMIT
+; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fpxx,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG -check-prefix=ODDSPREG-EMIT
+
+; We don't emit a directive unless we need to. This is to support versions of
+; GAS which do not support the directive.
+; ODDSPREG-EMIT:        .module oddspreg
+; ODDSPREG-NO-EMIT-NOT: .module oddspreg
+; NOODDSPREG:           .module nooddspreg
+
+define float @two_floats(float %a) {
+entry:
+  ; Clobber all except $f12 and $f13
+  ;
+  ; The intention is that if odd single precision registers are permitted, the
+  ; allocator will choose $f12 and $f13 to avoid the spill/reload.
+  ;
+  ; On the other hand, if odd single precision registers are not permitted, it
+  ; will be forced to spill/reload either %a or %0.
+
+  %0 = fadd float %a, 1.0
+  call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  %1 = fadd float %a, %0
+  ret float %1
+}
+
+; ALL-LABEL:  two_floats:
+; ODDSPREG:       add.s $f13, $f12, ${{f[0-9]+}}
+; ODDSPREG-NOT:   swc1
+; ODDSPREG-NOT:   lwc1
+; ODDSPREG:       add.s $f0, $f12, $f13
+
+; NOODDSPREG:     add.s $[[T0:f[0-9]*[02468]]], $f12, ${{f[0-9]+}}
+; NOODDSPREG:     swc1 $[[T0]],
+; NOODDSPREG:     lwc1 $[[T1:f[0-9]*[02468]]],
+; NOODDSPREG:     add.s $f0, $f12, $[[T1]]
+
+define double @two_doubles(double %a) {
+entry:
+  ; Clobber all except $f12 and $f13
+  ;
+  ; -mno-odd-sp-reg doesn't need to affect double precision values so both cases
+  ; use $f12 and $f13.
+
+  %0 = fadd double %a, 1.0
+  call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  %1 = fadd double %a, %0
+  ret double %1
+}
+
+; ALL-LABEL: two_doubles:
+; ALL:           add.d $[[T0:f[0-9]+]], $f12, ${{f[0-9]+}}
+; ALL:           add.d $f0, $f12, $[[T0]]
+
+
+; INVALID: -mattr=+nooddspreg is not currently permitted for a 32-bit FPU register file (FR=0 mode).
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
index bf7c667d057f6..0affb16ac7c2a 100644
--- a/test/CodeGen/Mips/nomips16.ll
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
 
 @x = global float 0.000000e+00, align 4
 @.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1
diff --git a/test/CodeGen/Mips/null-streamer.ll b/test/CodeGen/Mips/null-streamer.ll
new file mode 100644
index 0000000000000..56cebbfafb7c9
--- /dev/null
+++ b/test/CodeGen/Mips/null-streamer.ll
@@ -0,0 +1,7 @@
+; Test the null streamer with a terget streamer.
+; RUN: llc -O0 -filetype=null -mtriple=mips-linux < %s
+
+define i32 @main()  {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll
index 00c66a9928f62..bc78a27f199c8 100644
--- a/test/CodeGen/Mips/null.ll
+++ b/test/CodeGen/Mips/null.ll
@@ -5,7 +5,7 @@ define i32 @main() nounwind {
 entry:
   ret i32 0
 
-; 16: 	.set	mips16                  # @main
+; 16: 	.set	mips16
 
 
 ; 16:	jrc	$ra
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
index 08e5aab4f7ac4..c28f9abcadcdd 100644
--- a/test/CodeGen/Mips/o32_cc.ll
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -1,12 +1,13 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=FP32EL %s
-; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck -check-prefix=FP64EL %s
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=ALL %s
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck -check-prefix=ALL %s
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck -check-prefix=ALL -check-prefix=NO-MFHC1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r2              < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-MFHC1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-MFHC1 %s
 
 ; $f12, $f14
-; CHECK-LABEL: testlowercall0:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: ldc1 $f14, %lo
+; ALL-LABEL: testlowercall0:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       ldc1 $f14, %lo
 define void @testlowercall0() nounwind {
 entry:
   tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
@@ -16,9 +17,9 @@ entry:
 declare void @f0(double, double)
 
 ; $f12, $f14
-; CHECK-LABEL: testlowercall1:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
+; ALL-LABEL: testlowercall1:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       lwc1 $f14, %lo
 define void @testlowercall1() nounwind {
 entry:
   tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
@@ -28,9 +29,9 @@ entry:
 declare void @f1(float, float)
 
 ; $f12, $f14
-; CHECK-LABEL: testlowercall2:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: ldc1 $f14, %lo
+; ALL-LABEL: testlowercall2:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       ldc1 $f14, %lo
 define void @testlowercall2() nounwind {
 entry:
   tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
@@ -40,9 +41,9 @@ entry:
 declare void @f2(float, double)
 
 ; $f12, $f14
-; CHECK-LABEL: testlowercall3:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
+; ALL-LABEL: testlowercall3:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       lwc1 $f14, %lo
 define void @testlowercall3() nounwind {
 entry:
   tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
@@ -52,11 +53,11 @@ entry:
 declare void @f3(double, float)
 
 ; $4, $5, $6, $7
-; CHECK-LABEL: testlowercall4:
-; CHECK-DAG: addiu $4, $zero, 12
-; CHECK-DAG: addiu $5, $zero, 13
-; CHECK-DAG: addiu $6, $zero, 14
-; CHECK-DAG: addiu $7, $zero, 15
+; ALL-LABEL: testlowercall4:
+; ALL-DAG:       addiu $4, $zero, 12
+; ALL-DAG:       addiu $5, $zero, 13
+; ALL-DAG:       addiu $6, $zero, 14
+; ALL-DAG:       addiu $7, $zero, 15
 define void @testlowercall4() nounwind {
 entry:
   tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
@@ -66,11 +67,11 @@ entry:
 declare void @f4(i32, i32, i32, i32)
 
 ; $f12, $6, stack
-; CHECK-LABEL: testlowercall5:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: addiu $6, $zero, 23
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
+; ALL-LABEL: testlowercall5:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       addiu $6, $zero, 23
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 16($sp)
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall5() nounwind {
 entry:
   tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
@@ -80,10 +81,10 @@ entry:
 declare void @f5(double, i32, double)
 
 ; $f12, $6, $7
-; CHECK-LABEL: testlowercall6:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: addiu $6, $zero, 33
-; CHECK-DAG: addiu $7, $zero, 24
+; ALL-LABEL: testlowercall6:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       addiu $6, $zero, 33
+; ALL-DAG:       addiu $7, $zero, 24
 define void @testlowercall6() nounwind {
 entry:
   tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
@@ -93,10 +94,10 @@ entry:
 declare void @f6(double, i32, i32)
 
 ; $f12, $5, $6
-; CHECK-LABEL: testlowercall7:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: addiu $5, $zero, 43
-; CHECK-DAG: addiu $6, $zero, 34
+; ALL-LABEL: testlowercall7:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       addiu $5, $zero, 43
+; ALL-DAG:       addiu $6, $zero, 34
 define void @testlowercall7() nounwind {
 entry:
   tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
@@ -106,12 +107,12 @@ entry:
 declare void @f7(float, i32, i32)
 
 ; $4, $5, $6, stack
-; CHECK-LABEL: testlowercall8:
-; CHECK-DAG: addiu $4, $zero, 22
-; CHECK-DAG: addiu $5, $zero, 53
-; CHECK-DAG: addiu $6, $zero, 44
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
+; ALL-LABEL: testlowercall8:
+; ALL-DAG:       addiu $4, $zero, 22
+; ALL-DAG:       addiu $5, $zero, 53
+; ALL-DAG:       addiu $6, $zero, 44
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 16($sp)
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall8() nounwind {
 entry:
   tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
@@ -121,11 +122,11 @@ entry:
 declare void @f8(i32, i32, i32, double)
 
 ; $4, $5, $6, $7
-; CHECK-LABEL: testlowercall9:
-; CHECK-DAG: addiu $4, $zero, 32
-; CHECK-DAG: addiu $5, $zero, 63
-; CHECK-DAG: addiu $6, $zero, 54
-; CHECK-DAG: lui $7, 16688
+; ALL-LABEL: testlowercall9:
+; ALL-DAG:       addiu $4, $zero, 32
+; ALL-DAG:       addiu $5, $zero, 63
+; ALL-DAG:       addiu $6, $zero, 54
+; ALL-DAG:       lui $7, 16688
 define void @testlowercall9() nounwind {
 entry:
   tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
@@ -135,15 +136,16 @@ entry:
 declare void @f9(i32, i32, i32, float)
 
 ; $4, $5, ($6, $7)
-; CHECK-LABEL: testlowercall10:
-; CHECK-DAG: addiu $4, $zero, 42
-; CHECK-DAG: addiu $5, $zero, 73
-; FP32EL-LABEL: testlowercall10:
-; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
-; FP64EL-LABEL: testlowercall10:
-; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
+; ALL-LABEL: testlowercall10:
+
+; ALL-DAG:       addiu $4, $zero, 42
+; ALL-DAG:       addiu $5, $zero, 73
+
+; NO-MFHC1-DAG:  mfc1 $6, $f{{[0-9]+}}
+; NO-MFHC1-DAG:  mfc1 $7, $f{{[0-9]+}}
+
+; HAS-MFHC1-DAG: mfc1 $6, $f{{[0-9]+}}
+; HAS-MFHC1-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall10() nounwind {
 entry:
   tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
@@ -153,14 +155,14 @@ entry:
 declare void @f10(i32, i32, double)
 
 ; $4, ($6, $7)
-; CHECK-LABEL: testlowercall11:
-; CHECK-DAG: addiu $4, $zero, 52
-; FP32EL-LABEL: testlowercall11:
-; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
-; FP64EL-LABEL: testlowercall11:
-; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
+; ALL-LABEL: testlowercall11:
+; ALL-DAG:       addiu $4, $zero, 52
+
+; NO-MFHC1-DAG:  mfc1 $6, $f{{[0-9]+}}
+; NO-MFHC1-DAG:  mfc1 $7, $f{{[0-9]+}}
+
+; HAS-MFHC1-DAG: mfc1 $6, $f{{[0-9]+}}
+; HAS-MFHC1-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall11() nounwind {
 entry:
   tail call void @f11(i32 52, double 1.600000e+01) nounwind
@@ -170,11 +172,11 @@ entry:
 declare void @f11(i32, double)
 
 ; $f12, $f14, $6, $7
-; CHECK-LABEL: testlowercall12:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
-; CHECK-DAG: lui $6, 16672
-; CHECK-DAG: lui $7, 16808
+; ALL-LABEL: testlowercall12:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       lwc1 $f14, %lo
+; ALL-DAG:       lui $6, 16672
+; ALL-DAG:       lui $7, 16808
 define void @testlowercall12() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
@@ -184,11 +186,11 @@ entry:
 declare void @f12(float, float, float, float)
 
 ; $f12, $5, $6, $7
-; CHECK-LABEL: testlowercall13:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: addiu $5, $zero, 83
-; CHECK-DAG: lui $6, 16800
-; CHECK-DAG: addiu $7, $zero, 25
+; ALL-LABEL: testlowercall13:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       addiu $5, $zero, 83
+; ALL-DAG:       lui $6, 16800
+; ALL-DAG:       addiu $7, $zero, 25
 define void @testlowercall13() nounwind {
 entry:
   tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
@@ -199,10 +201,10 @@ entry:
 declare void @f13(float, i32, float, i32)
 
 ; $f12, $f14, $7
-; CHECK-LABEL: testlowercall14:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
-; CHECK-DAG: lui $7, 16880
+; ALL-LABEL: testlowercall14:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       lwc1 $f14, %lo
+; ALL-DAG:       lui $7, 16880
 define void @testlowercall14() nounwind {
 entry:
   tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
@@ -212,15 +214,15 @@ entry:
 declare void @f14(double, float, float)
 
 ; $f12, $f14, ($6, $7)
-; CHECK-LABEL: testlowercall15:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
-; FP32EL-LABEL: testlowercall15:
-; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
-; FP64EL-LABEL: testlowercall15:
-; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
+; ALL-LABEL: testlowercall15:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       lwc1 $f14, %lo
+
+; NO-MFHC1-DAG:  mfc1 $6, $f{{[0-9]+}}
+; NO-MFHC1-DAG:  mfc1 $7, $f{{[0-9]+}}
+
+; HAS-MFHC1-DAG: mfc1 $6, $f{{[0-9]+}}
+; HAS-MFHC1-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall15() nounwind {
 entry:
   tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
@@ -230,11 +232,11 @@ entry:
 declare void @f15(float, float, double)
 
 ; $4, $5, $6, $7
-; CHECK-LABEL: testlowercall16:
-; CHECK-DAG: addiu $4, $zero, 62
-; CHECK-DAG: lui $5, 16964
-; CHECK-DAG: addiu $6, $zero, 64
-; CHECK-DAG: lui $7, 16888
+; ALL-LABEL: testlowercall16:
+; ALL-DAG:       addiu $4, $zero, 62
+; ALL-DAG:       lui $5, 16964
+; ALL-DAG:       addiu $6, $zero, 64
+; ALL-DAG:       lui $7, 16888
 define void @testlowercall16() nounwind {
 entry:
   tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
@@ -244,11 +246,11 @@ entry:
 declare void @f16(i32, float, i32, float)
 
 ; $4, $5, $6, $7
-; CHECK-LABEL: testlowercall17:
-; CHECK-DAG: addiu $4, $zero, 72
-; CHECK-DAG: lui $5, 17004
-; CHECK-DAG: addiu $6, $zero, 74
-; CHECK-DAG: addiu $7, $zero, 35
+; ALL-LABEL: testlowercall17:
+; ALL-DAG:       addiu $4, $zero, 72
+; ALL-DAG:       lui $5, 17004
+; ALL-DAG:       addiu $6, $zero, 74
+; ALL-DAG:       addiu $7, $zero, 35
 define void @testlowercall17() nounwind {
 entry:
   tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
@@ -258,11 +260,11 @@ entry:
 declare void @f17(i32, float, i32, i32)
 
 ; $4, $5, $6, $7
-; CHECK-LABEL: testlowercall18:
-; CHECK-DAG: addiu $4, $zero, 82
-; CHECK-DAG: addiu $5, $zero, 93
-; CHECK-DAG: lui $6, 16928
-; CHECK-DAG: addiu $7, $zero, 45
+; ALL-LABEL: testlowercall18:
+; ALL-DAG:       addiu $4, $zero, 82
+; ALL-DAG:       addiu $5, $zero, 93
+; ALL-DAG:       lui $6, 16928
+; ALL-DAG:       addiu $7, $zero, 45
 define void @testlowercall18() nounwind {
 entry:
   tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
@@ -273,16 +275,16 @@ declare void @f18(i32, i32, float, i32)
 
 
 ; $4, ($6, $7), stack
-; CHECK-LABEL: testlowercall20:
-; CHECK-DAG: addiu $4, $zero, 92
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
-; FP32EL-LABEL: testlowercall20:
-; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
-; FP64EL-LABEL: testlowercall20:
-; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
+; ALL-LABEL: testlowercall20:
+; ALL-DAG:       addiu $4, $zero, 92
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 16($sp)
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 20($sp)
+
+; NO-MFHC1-DAG:  mfc1 $6, $f{{[0-9]+}}
+; NO-MFHC1-DAG:  mfc1 $7, $f{{[0-9]+}}
+
+; HAS-MFHC1-DAG: mfc1 $6, $f{{[0-9]+}}
+; HAS-MFHC1-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall20() nounwind {
 entry:
   tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
@@ -292,9 +294,9 @@ entry:
 declare void @f20(i32, double, double)
 
 ; $f12, $5
-; CHECK-LABEL: testlowercall21:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: addiu $5, $zero, 103
+; ALL-LABEL: testlowercall21:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       addiu $5, $zero, 103
 define void @testlowercall21() nounwind {
 entry:
   tail call void @f21(float 5.800000e+01, i32 103) nounwind
@@ -304,15 +306,15 @@ entry:
 declare void @f21(float, i32)
 
 ; $f12, $5, ($6, $7)
-; CHECK-LABEL: testlowercall22:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: addiu $5, $zero, 113
-; FP32EL-LABEL: testlowercall22:
-; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
-; FP64EL-LABEL: testlowercall22:
-; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
-; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
+; ALL-LABEL: testlowercall22:
+; ALL-DAG:       lwc1 $f12, %lo
+; ALL-DAG:       addiu $5, $zero, 113
+
+; NO-MFHC1-DAG:  mfc1 $6, $f{{[0-9]+}}
+; NO-MFHC1-DAG:  mfc1 $7, $f{{[0-9]+}}
+
+; HAS-MFHC1-DAG: mfc1 $6, $f{{[0-9]+}}
+; HAS-MFHC1-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall22() nounwind {
 entry:
   tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
@@ -322,9 +324,9 @@ entry:
 declare void @f22(float, i32, double)
 
 ; $f12, f6
-; CHECK-LABEL: testlowercall23:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: addiu $6, $zero, 123
+; ALL-LABEL: testlowercall23:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       addiu $6, $zero, 123
 define void @testlowercall23() nounwind {
 entry:
   tail call void @f23(double 4.500000e+01, i32 123) nounwind
@@ -334,11 +336,11 @@ entry:
 declare void @f23(double, i32)
 
 ; $f12,$6, stack
-; CHECK-LABEL: testlowercall24:
-; CHECK-DAG: ldc1 $f12, %lo
-; CHECK-DAG: addiu $6, $zero, 133
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
-; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
+; ALL-LABEL: testlowercall24:
+; ALL-DAG:       ldc1 $f12, %lo
+; ALL-DAG:       addiu $6, $zero, 133
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 16($sp)
+; ALL-DAG:       sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall24() nounwind {
 entry:
   tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
@@ -347,19 +349,19 @@ entry:
 
 declare void @f24(double, i32, double)
 
-; CHECK-LABEL: testlowercall25:
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: lwc1 $f14, %lo
-; CHECK-DAG: lui $6
-; CHECK-DAG: lui $7
-; CHECK-DAG: lwc1 $f12, %lo
-; CHECK-DAG: addiu $5, $zero, 83
-; CHECK-DAG: lui $6
-; CHECK-DAG: addiu $7, $zero, 25
-; CHECK-DAG: addiu $4, $zero, 82
-; CHECK-DAG: addiu $5, $zero, 93
-; CHECK-DAG: lui $6
-; CHECK-DAG: addiu $7, $zero, 45
+; ALL-LABEL: testlowercall25:
+; ALL-DAG:      lwc1 $f12, %lo
+; ALL-DAG:      lwc1 $f14, %lo
+; ALL-DAG:      lui $6
+; ALL-DAG:      lui $7
+; ALL-DAG:      lwc1 $f12, %lo
+; ALL-DAG:      addiu $5, $zero, 83
+; ALL-DAG:      lui $6
+; ALL-DAG:      addiu $7, $zero, 25
+; ALL-DAG:      addiu $4, $zero, 82
+; ALL-DAG:      addiu $5, $zero, 93
+; ALL-DAG:      lui $6
+; ALL-DAG:      addiu $7, $zero, 45
 define void @testlowercall25() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
new file mode 100644
index 0000000000000..d5ff9bdf36087
--- /dev/null
+++ b/test/CodeGen/Mips/octeon.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=MIPS64
+
+define i64 @addi64(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: addi64:
+; OCTEON: jr      $ra
+; OCTEON: baddu   $2, $4, $5
+; MIPS64-LABEL: addi64:
+; MIPS64: daddu
+; MIPS64: jr
+; MIPS64: andi
+  %add = add i64 %a, %b
+  %and = and i64 %add, 255
+  ret i64 %and
+}
+
+define i64 @mul(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: mul:
+; OCTEON: jr    $ra
+; OCTEON: dmul  $2, $4, $5
+; MIPS64-LABEL: mul:
+; MIPS64: dmult
+; MIPS64: jr
+; MIPS64: mflo
+  %res = mul i64 %a, %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll
new file mode 100644
index 0000000000000..52c37f69d0206
--- /dev/null
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O1 -march=mips64 -mcpu=octeon < %s | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=MIPS64
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; OCTEON-LABEL: cnt8:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt8:
+; MIPS64-NOT: pop
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; OCTEON-LABEL: cnt16:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt16:
+; MIPS64-NOT: pop
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; OCTEON-LABEL: cnt32:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $4
+; MIPS64-LABEL: cnt32:
+; MIPS64-NOT: pop
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; OCTEON-LABEL: cnt64:
+; OCTEON: jr   $ra
+; OCTEON: dpop $2, $4
+; MIPS64-LABEL: cnt64:
+; MIPS64-NOT: dpop
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Mips/optimize-fp-math.ll b/test/CodeGen/Mips/optimize-fp-math.ll
index 8b71dc42344c1..7886f29f5cef2 100644
--- a/test/CodeGen/Mips/optimize-fp-math.ll
+++ b/test/CodeGen/Mips/optimize-fp-math.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 ; 32-LABEL: test_sqrtf_float_:
diff --git a/test/CodeGen/Mips/optimize-pic-o0.ll b/test/CodeGen/Mips/optimize-pic-o0.ll
new file mode 100644
index 0000000000000..554d49e728c7c
--- /dev/null
+++ b/test/CodeGen/Mips/optimize-pic-o0.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=mipsel -O0 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @main()  {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  call void bitcast (void (...)* @foo to void ()*)()
+; CHECK: jalr $25
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %1 = load i32* %i, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %2 = load i32* %retval
+  ret i32 %2
+}
+
+declare void @foo(...) 
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
index 35a7ca9201e27..48757276bb8ca 100644
--- a/test/CodeGen/Mips/powif64_16.ll
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
 
 declare float     @llvm.powi.f32(float  %Val, i32 %power)
 declare double    @llvm.powi.f64(double %Val, i32 %power)
diff --git a/test/CodeGen/Mips/prevent-hoisting.ll b/test/CodeGen/Mips/prevent-hoisting.ll
new file mode 100644
index 0000000000000..da665c2109095
--- /dev/null
+++ b/test/CodeGen/Mips/prevent-hoisting.ll
@@ -0,0 +1,144 @@
+; RUN: llc -march=mipsel -O3 < %s | FileCheck %s
+
+
+; MIPS direct branches implicitly define register $at. This test makes sure that
+; code hoisting optimization (which moves identical instructions at the start of
+; two basic blocks to the common predecessor block) takes this into account and
+; doesn't move definition of $at to the predecessor block (which would make $at
+; live-in at the start of successor block).
+
+
+; CHECK-LABEL: readLumaCoeff8x8_CABAC
+
+; The check for "addiu" instruction is added so that we can match the correct "b" instruction.
+; CHECK:           addiu ${{[0-9]+}}, $zero, -1
+; CHECK:           b $[[BB0:BB[0-9_]+]]
+
+; Check that sll instruction that writes to $1 starts basic block.
+; CHECK:       {{BB[0-9_#]+}}: 
+; CHECK-NEXT:      sll $1, $[[R0:[0-9]+]], 4
+
+; Check that identical sll instruction starts another basic block.
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:      sll $1, $[[R0]], 4
+
+
+%struct.img_par = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice*, %struct.macroblock*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i32, i32, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.img_par*, %struct.inp_par*)*, i32, i32, i32, i32 }
+%struct.datapartition = type { %struct.Bitstream*, %struct.DecodingEnvironment, i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)* }
+%struct.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+%struct.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+%struct.syntaxelement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.syntaxelement*, %struct.img_par*, %struct.DecodingEnvironment*)* }
+%struct.MotionInfoContexts = type { [4 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+%struct.BiContextType = type { i16, i8 }
+%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+%struct.inp_par = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.macroblock = type { i32, [2 x i32], i32, i32, %struct.macroblock*, %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.DecRefPicMarking_s = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s* }
+%struct.timeb = type { i32, i16, i16, i16 }
+
+@assignSE2partition = external global [0 x [20 x i32]]
+@FIELD_SCAN8x8 = external constant [64 x [2 x i8]]
+
+
+define void @readLumaCoeff8x8_CABAC(%struct.img_par* %img, i32 %b8) {
+
+  %1 = load i32* undef, align 4
+  br i1 false, label %2, label %3
+
+; <label>:2                                       ; preds = %0
+  br label %3
+
+; <label>:3                                       ; preds = %2, %0
+  br i1 undef, label %switch.lookup, label %4
+
+switch.lookup:                                    ; preds = %3
+  br label %4
+
+; <label>:4                                       ; preds = %switch.lookup, %3
+  br i1 undef, label %5, label %6
+
+; <label>:5                                       ; preds = %4
+  br label %6
+
+; <label>:6                                       ; preds = %5, %4
+  %7 = phi [2 x i8]* [ getelementptr inbounds ([64 x [2 x i8]]* @FIELD_SCAN8x8, i32 0, i32 0), %4 ], [ null, %5 ]
+  br i1 undef, label %switch.lookup6, label %8
+
+switch.lookup6:                                   ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %switch.lookup6, %6
+  br i1 undef, label %.loopexit, label %9
+
+; <label>:9                                       ; preds = %8
+  %10 = and i32 %b8, 1
+  %11 = shl nuw nsw i32 %10, 3
+  %12 = getelementptr inbounds %struct.Slice* null, i32 0, i32 9
+  br i1 undef, label %.preheader, label %.preheader11
+
+.preheader11:                                     ; preds = %21, %9
+  %k.014 = phi i32 [ %27, %21 ], [ 0, %9 ]
+  %coef_ctr.013 = phi i32 [ %23, %21 ], [ -1, %9 ]
+  br i1 false, label %13, label %14
+
+; <label>:13                                      ; preds = %.preheader11
+  br label %15
+
+; <label>:14                                      ; preds = %.preheader11
+  br label %15
+
+; <label>:15                                      ; preds = %14, %13
+  %16 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+  %17 = load i32* %16, align 4
+  %18 = getelementptr inbounds %struct.datapartition* null, i32 %17, i32 2
+  %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
+  %20 = call i32 %19(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* undef)
+  br i1 false, label %.loopexit, label %21
+
+; <label>:21                                      ; preds = %15
+  %22 = add i32 %coef_ctr.013, 1
+  %23 = add i32 %22, 0
+  %24 = getelementptr inbounds [2 x i8]* %7, i32 %23, i32 0
+  %25 = add nsw i32 0, %11
+  %26 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %25
+  store i32 0, i32* %26, align 4
+  %27 = add nsw i32 %k.014, 1
+  %28 = icmp slt i32 %27, 65
+  br i1 %28, label %.preheader11, label %.loopexit
+
+.preheader:                                       ; preds = %36, %9
+  %k.110 = phi i32 [ %45, %36 ], [ 0, %9 ]
+  %coef_ctr.29 = phi i32 [ %39, %36 ], [ -1, %9 ]
+  br i1 false, label %29, label %30
+
+; <label>:29                                      ; preds = %.preheader
+  br label %31
+
+; <label>:30                                      ; preds = %.preheader
+  br label %31
+
+; <label>:31                                      ; preds = %30, %29
+  %32 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+  %33 = load i32* %32, align 4
+  %34 = getelementptr inbounds %struct.datapartition* null, i32 %33
+  %35 = call i32 undef(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* %34)
+  br i1 false, label %.loopexit, label %36
+
+; <label>:36                                      ; preds = %31
+  %37 = load i32* undef, align 4
+  %38 = add i32 %coef_ctr.29, 1
+  %39 = add i32 %38, %37
+  %40 = getelementptr inbounds [2 x i8]* %7, i32 %39, i32 0
+  %41 = load i8* %40, align 1
+  %42 = zext i8 %41 to i32
+  %43 = add nsw i32 %42, %11
+  %44 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %43
+  store i32 0, i32* %44, align 4
+  %45 = add nsw i32 %k.110, 1
+  %46 = icmp slt i32 %45, 65
+  br i1 %46, label %.preheader, label %.loopexit
+
+.loopexit:                                        ; preds = %36, %31, %21, %15, %8
+  ret void
+}
diff --git a/test/CodeGen/Mips/remat-immed-load.ll b/test/CodeGen/Mips/remat-immed-load.ll
index d93964bcaef6f..b53b156e9eecf 100644
--- a/test/CodeGen/Mips/remat-immed-load.ll
+++ b/test/CodeGen/Mips/remat-immed-load.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=64
 
 define void @f0() nounwind {
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 813bbdf18bbda..70eff6e224d07 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s -check-prefix=mips16 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 < %s | FileCheck %s -check-prefix=mips16
 
 ; CHECK:  rotrv $2, $4
 ; mips16: .ent rot0
diff --git a/test/CodeGen/Mips/s2rem.ll b/test/CodeGen/Mips/s2rem.ll
new file mode 100644
index 0000000000000..9edb5be2771ea
--- /dev/null
+++ b/test/CodeGen/Mips/s2rem.ll
@@ -0,0 +1,92 @@
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic  < %s | FileCheck %s -check-prefix=PIC
+
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=STATIC
+
+
+@xi = common global i32 0, align 4
+@x = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @it() #0 {
+entry:
+  %call = call i32 @i(i32 1)
+  store i32 %call, i32* @xi, align 4
+  ret void
+; PIC: 	.ent	it
+; STATIC: 	.ent	it
+; PIC: 	save	$16, $17, $ra, [[FS:[0-9]+]]
+; STATIC:      save	$16, $ra, [[FS:[0-9]+]]
+; PIC: 	restore	$16, $17, $ra, [[FS]]
+; STATIC:      restore	$16, $ra, [[FS]]
+; PIC: 	.end	it
+; STATIC: 	.end	it
+}
+
+declare i32 @i(i32) #1
+
+; Function Attrs: nounwind
+define void @ft() #0 {
+entry:
+  %call = call float @f()
+  store float %call, float* @x, align 4
+  ret void
+; PIC: 	.ent	ft
+; PIC: 	save	$16, $17, $ra, $18, [[FS:[0-9]+]]
+; PIC: 	restore	$16, $17, $ra, $18, [[FS]]
+; PIC: 	.end	ft
+}
+
+declare float @f() #1
+
+; Function Attrs: nounwind
+define void @dt() #0 {
+entry:
+  %call = call double @d()
+  store double %call, double* @xd, align 8
+  ret void
+; PIC: 	.ent	dt
+; PIC: 	save	$16, $17, $ra, $18, [[FS:[0-9]+]]
+; PIC: 	restore	$16, $17, $ra, $18, [[FS]]
+; PIC: 	.end	dt
+}
+
+declare double @d() #1
+
+; Function Attrs: nounwind
+define void @fft() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @ff(float %0)
+  store float %call, float* @x, align 4
+  ret void
+; PIC: 	.ent	fft
+; PIC: 	save	$16, $17, $ra, $18, [[FS:[0-9]+]]
+; PIC: 	restore	$16, $17, $ra, $18, [[FS]]
+; PIC: 	.end	fft
+}
+
+declare float @ff(float) #1
+
+; Function Attrs: nounwind
+define void @vft() #0 {
+entry:
+  %0 = load float* @x, align 4
+  call void @vf(float %0)
+  ret void
+; PIC: 	.ent	vft
+; STATIC: 	.ent	vft
+; PIC: 	save	$16, $ra, [[FS:[0-9]+]]
+; STATIC:      save	$16, $ra, [[FS:[0-9]+]]
+; PIC: 	restore	$16, $ra, [[FS]]
+; STATIC:      restore	$16, $ra, [[FS]]
+; PIC: 	.end	vft
+; STATIC: 	.end	vft
+}
+
+declare void @vf(float) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
index 4c4784de6aa84..edd2e3e43b797 100644
--- a/test/CodeGen/Mips/sel1c.ll
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -10,7 +10,7 @@ entry:
   %0 = load i32* @i, align 4
   %1 = load i32* @j, align 4
   %cmp = icmp eq i32 %0, %1
-  %cond = select i1 %cmp, i32 1, i32 2
+  %cond = select i1 %cmp, i32 1, i32 3
   store i32 %cond, i32* @k, align 4
   ret void
 ; cond-b-short:	bteqz	$BB0_{{[0-9]+}}  # 16 bit inst
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
index 25dfaa9ba87ea..4b211245f46e9 100644
--- a/test/CodeGen/Mips/sel2c.ll
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -10,7 +10,7 @@ entry:
   %0 = load i32* @i, align 4
   %1 = load i32* @j, align 4
   %cmp = icmp ne i32 %0, %1
-  %cond = select i1 %cmp, i32 1, i32 2
+  %cond = select i1 %cmp, i32 1, i32 3
   store i32 %cond, i32* @k, align 4
 ; cond-b-short:	btnez	$BB0_{{[0-9]+}}  # 16 bit inst
   ret void
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index 06e2a86ad1760..eb2198b36dff3 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -1,135 +1,705 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -march=mipsel   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32R2
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64R6
 
 @d2 = external global double
 @d3 = external global double
 
-define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
 entry:
-; CHECK: movn
+; ALL-LABEL: i32_icmp_ne_i32_val:
+
+; 32:            movn $5, $6, $4
+; 32:            move $2, $5
+
+; 32R2:          movn $5, $6, $4
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 32R6:          or $2, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, i32 %f1, i32 %f0
   ret i32 %cond
 }
 
-define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+entry:
+; ALL-LABEL: i32_icmp_ne_i64_val:
+
+; 32-DAG:        lw $[[F1:[0-9]+]], 16($sp)
+; 32-DAG:        movn $6, $[[F1]], $4
+; 32-DAG:        lw $[[F1H:[0-9]+]], 20($sp)
+; 32:            movn $7, $[[F1H]], $4
+; 32:            move $2, $6
+; 32:            move $3, $7
+
+; 32R2-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R2-DAG:      movn $6, $[[F1]], $4
+; 32R2-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R2:          movn $7, $[[F1H]], $4
+; 32R2:          move $2, $6
+; 32R2:          move $3, $7
+
+; 32R6-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $6, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1]], $4
+; 32R6:          or $2, $[[T1]], $[[T0]]
+; 32R6-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $7, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1H]], $4
+; 32R6:          or $3, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64R6-DAG:      sll $[[CC:[0-9]+]], $4, 0
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, i64 %f1, i64 %f0
+  ret i64 %cond
+}
+
+define i64 @i64_icmp_ne_i64_val(i64 %s, i64 %f0, i64 %f1) nounwind readnone {
 entry:
-; CHECK: movn.s
+; ALL-LABEL: i64_icmp_ne_i64_val:
+
+; 32-DAG:        or $[[CC:[0-9]+]], $4
+; 32-DAG:        lw $[[F1:[0-9]+]], 16($sp)
+; 32-DAG:        movn $6, $[[F1]], $[[CC]]
+; 32-DAG:        lw $[[F1H:[0-9]+]], 20($sp)
+; 32:            movn $7, $[[F1H]], $[[CC]]
+; 32:            move $2, $6
+; 32:            move $3, $7
+
+; 32R2-DAG:      or $[[CC:[0-9]+]], $4
+; 32R2-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R2-DAG:      movn $6, $[[F1]], $[[CC]]
+; 32R2-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R2:          movn $7, $[[F1H]], $[[CC]]
+; 32R2:          move $2, $6
+; 32R2:          move $3, $7
+
+; 32R6-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R6-DAG:      or $[[T2:[0-9]+]], $4, $5
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $6, $[[T2]]
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1]], $[[T2]]
+; 32R6:          or $2, $[[T1]], $[[T0]]
+; 32R6-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $7, $[[T2]]
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1H]], $[[T2]]
+; 32R6:          or $3, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
+  %tobool = icmp ne i64 %s, 0
+  %cond = select i1 %tobool, i64 %f1, i64 %f0
+  ret i64 %cond
+}
+
+define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+entry:
+; ALL-LABEL: i32_icmp_ne_f32_val:
+
+; 32-DAG:        mtc1 $5, $[[F0:f[0-9]+]]
+; 32-DAG:        mtc1 $6, $[[F1:f0]]
+; 32:            movn.s $[[F1]], $[[F0]], $4
+
+; 32R2-DAG:      mtc1 $5, $[[F0:f[0-9]+]]
+; 32R2-DAG:      mtc1 $6, $[[F1:f0]]
+; 32R2:          movn.s $[[F1]], $[[F0]], $4
+
+; 32R6-DAG:      mtc1 $5, $[[F0:f[0-9]+]]
+; 32R6-DAG:      mtc1 $6, $[[F1:f[0-9]+]]
+; 32R6:          sltu $[[T0:[0-9]+]], $zero, $4
+; 32R6:          mtc1 $[[T0]], $[[CC:f0]]
+; 32R6:          sel.s $[[CC]], $[[F1]], $[[F0]]
+
+; 64:            movn.s $f14, $f13, $4
+; 64:            mov.s $f0, $f14
+
+; 64R2:          movn.s $f14, $f13, $4
+; 64R2:          mov.s $f0, $f14
+
+; 64R6:          sltu $[[T0:[0-9]+]], $zero, $4
+; 64R6:          mtc1 $[[T0]], $[[CC:f0]]
+; 64R6:          sel.s $[[CC]], $f14, $f13
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, float %f0, float %f1
   ret float %cond
 }
 
-define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
 entry:
-; CHECK: movn.d
+; ALL-LABEL: i32_icmp_ne_f64_val:
+
+; 32-DAG:        mtc1 $6, $[[F0:f[1-3]*[02468]+]]
+; 32-DAG:        mtc1 $7, $[[F0H:f[1-3]*[13579]+]]
+; 32-DAG:        ldc1 $[[F1:f0]], 16($sp)
+; 32:            movn.d $[[F1]], $[[F0]], $4
+
+; 32R2-DAG:      mtc1 $6, $[[F0:f[0-9]+]]
+; 32R2-DAG:      mthc1 $7, $[[F0]]
+; 32R2-DAG:      ldc1 $[[F1:f0]], 16($sp)
+; 32R2:          movn.d $[[F1]], $[[F0]], $4
+
+; 32R6-DAG:      mtc1 $6, $[[F0:f[0-9]+]]
+; 32R6-DAG:      mthc1 $7, $[[F0]]
+; 32R6-DAG:      sltu $[[T0:[0-9]+]], $zero, $4
+; 32R6-DAG:      mtc1 $[[T0]], $[[CC:f0]]
+; 32R6-DAG:      ldc1 $[[F1:f[0-9]+]], 16($sp)
+; 32R6:          sel.d $[[CC]], $[[F1]], $[[F0]]
+
+; 64:            movn.d $f14, $f13, $4
+; 64:            mov.d $f0, $f14
+
+; 64R2:          movn.d $f14, $f13, $4
+; 64R2:          mov.d $f0, $f14
+
+; 64R6-DAG:      sltu $[[T0:[0-9]+]], $zero, $4
+; 64R6-DAG:      mtc1 $[[T0]], $[[CC:f0]]
+; 64R6:          sel.d $[[CC]], $f14, $f13
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, double %f0, double %f1
   ret double %cond
 }
 
-define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_oeq_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.s
-; CHECK: movt.s
+; ALL-LABEL: f32_fcmp_oeq_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.eq.s $[[F2]], $[[F3]]
+; 32:            movt.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.eq.s $[[F2]], $[[F3]]
+; 32R2:          movt.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.eq.s $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.eq.s $f14, $f15
+; 64:            movt.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.eq.s $f14, $f15
+; 64R2:          movt.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.eq.s $[[CC:f0]], $f14, $f15
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp oeq float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_olt_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.s
-; CHECK: movt.s
+; ALL-LABEL: f32_fcmp_olt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.olt.s $[[F2]], $[[F3]]
+; 32:            movt.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.olt.s $[[F2]], $[[F3]]
+; 32R2:          movt.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.olt.s $f14, $f15
+; 64:            movt.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.olt.s $f14, $f15
+; 64R2:          movt.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f14, $f15
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp olt float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_ogt_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf.s
+; ALL-LABEL: f32_fcmp_ogt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f15, $f14
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
+define double @f32_fcmp_ogt_f64_val(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf.d
+; ALL-LABEL: f32_fcmp_ogt_f64_val:
+
+; 32-DAG:        lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f15, $f14
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_oeq_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.d
-; CHECK: movt.d
+; ALL-LABEL: f64_fcmp_oeq_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.eq.d $[[F2]], $[[F3]]
+; 32:            movt.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.eq.d $[[F2]], $[[F3]]
+; 32R2:          movt.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.eq.d $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.eq.d $f14, $f15
+; 64:            movt.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.eq.d $f14, $f15
+; 64R2:          movt.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.eq.d $[[CC:f0]], $f14, $f15
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp oeq double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_olt_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.d
-; CHECK: movt.d
+; ALL-LABEL: f64_fcmp_olt_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.olt.d $[[F2]], $[[F3]]
+; 32:            movt.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.olt.d $[[F2]], $[[F3]]
+; 32R2:          movt.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.olt.d $f14, $f15
+; 64:            movt.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.olt.d $f14, $f15
+; 64R2:          movt.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f14, $f15
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp olt double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_ogt_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf.d
+; ALL-LABEL: f64_fcmp_ogt_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.ule.d $[[F2]], $[[F3]]
+; 32:            movf.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.ule.d $[[F2]], $[[F3]]
+; 32R2:          movf.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.ule.d $f14, $f15
+; 64:            movf.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.ule.d $f14, $f15
+; 64R2:          movf.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f15, $f14
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
+define float @f64_fcmp_ogt_f32_val(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf.s
+; ALL-LABEL: f64_fcmp_ogt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[1-3]*[02468]+]]
+; 32-DAG:        mtc1 $7, $[[F2H:f[1-3]*[13579]+]]
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32:            c.ule.d $[[F2]], $[[F3]]
+; 32:            movf.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mthc1 $7, $[[F2]]
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32R2:          c.ule.d $[[F2]], $[[F3]]
+; 32R2:          movf.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mthc1 $7, $[[F2]]
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.ule.d $f14, $f15
+; 64:            movf.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.ule.d $f14, $f15
+; 64R2:          movf.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f15, $f14
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt double %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.s
-; CHECK: movt
+; ALL-LABEL: f32_fcmp_oeq_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.eq.s $[[F2]], $[[F3]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.eq.s $[[F2]], $[[F3]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.eq.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.eq.s $f14, $f15
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.eq.s $f14, $f15
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.eq.s $[[CC:f[0-9]+]], $f14, $f15
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %cmp = fcmp oeq float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.s
-; CHECK: movt
+; ALL-LABEL: f32_fcmp_olt_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.olt.s $[[F2]], $[[F3]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.olt.s $[[F2]], $[[F3]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.olt.s $f14, $f15
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.olt.s $f14, $f15
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.lt.s $[[CC:f[0-9]+]], $f14, $f15
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
   %cmp = fcmp olt float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf
+; ALL-LABEL: f32_fcmp_ogt_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f[0-9]+]], $[[F3]], $[[F2]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.lt.s $[[CC:f[0-9]+]], $f15, $f14
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.eq.d
-; CHECK: movt
+; ALL-LABEL: f64_fcmp_oeq_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.eq.d $[[TMP]], $[[TMP1]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.eq.d $[[TMP]], $[[TMP1]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.eq.d $[[TMP]], $[[TMP1]]
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.eq.d $[[TMP]], $[[TMP1]]
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp oeq double %tmp, %tmp1
@@ -137,10 +707,76 @@ entry:
   ret i32 %cond
 }
 
-define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.olt.d
-; CHECK: movt
+; ALL-LABEL: f64_fcmp_olt_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.olt.d $[[TMP]], $[[TMP1]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.olt.d $[[TMP]], $[[TMP1]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.olt.d $[[TMP]], $[[TMP1]]
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.olt.d $[[TMP]], $[[TMP1]]
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp olt double %tmp, %tmp1
@@ -148,10 +784,76 @@ entry:
   ret i32 %cond
 }
 
-define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf
+; ALL-LABEL: f64_fcmp_ogt_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.ule.d $[[TMP]], $[[TMP1]]
+; 32:            movf $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.ule.d $[[TMP]], $[[TMP1]]
+; 32R2:          movf $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.ule.d $[[TMP]], $[[TMP1]]
+; 64:            movf $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.ule.d $[[TMP]], $[[TMP1]]
+; 64R2:          movf $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp ogt double %tmp, %tmp1
diff --git a/test/CodeGen/Mips/selectcc.ll b/test/CodeGen/Mips/selectcc.ll
index aeef60ecb806e..9790a0a3e4110 100644
--- a/test/CodeGen/Mips/selectcc.ll
+++ b/test/CodeGen/Mips/selectcc.ll
@@ -1,5 +1,7 @@
-; RUN: llc -march=mipsel < %s
-; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
+; RUN: llc -march=mipsel -mcpu=mips32 < %s
+; RUN: llc -march=mipsel -mcpu=mips32 -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
 
 @gf0 = external global float
 @gf1 = external global float
@@ -16,13 +18,11 @@ entry:
 ; SOURCE-SCHED: lw
 ; SOURCE-SCHED: lui
 ; SOURCE-SCHED: sw
-; SOURCE-SCHED: addiu
-; SOURCE-SCHED: addiu
-; SOURCE-SCHED: c.olt.s
-; SOURCE-SCHED: movt
+; SOURCE-SCHED: lw
+; SOURCE-SCHED: lwc1
 ; SOURCE-SCHED: mtc1
+; SOURCE-SCHED: c.olt.s
 ; SOURCE-SCHED: jr
-
   store float 0.000000e+00, float* @gf0, align 4
   store float 1.000000e+00, float* @gf1, align 4
   %cmp = fcmp olt float %a, %b
diff --git a/test/CodeGen/Mips/sint-fp-store_pattern.ll b/test/CodeGen/Mips/sint-fp-store_pattern.ll
index c44ea080a886b..2735d787432d4 100644
--- a/test/CodeGen/Mips/sint-fp-store_pattern.ll
+++ b/test/CodeGen/Mips/sint-fp-store_pattern.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 @gint_ = external global i32
diff --git a/test/CodeGen/Mips/sr1.ll b/test/CodeGen/Mips/sr1.ll
new file mode 100644
index 0000000000000..610693d58b3ff
--- /dev/null
+++ b/test/CodeGen/Mips/sr1.ll
@@ -0,0 +1,60 @@
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s 
+
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=NEG
+
+@f = common global float 0.000000e+00, align 4
+
+; Function Attrs: nounwind
+define void @foo1() #0 {
+entry:
+  %c = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0
+  call void @x(i8* %arraydecay)
+  %arraydecay1 = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0
+  call void @x(i8* %arraydecay1)
+  ret void
+; CHECK: 	.ent	foo1
+; CHECK: 	save	$16, $17, $ra, [[FS:[0-9]+]]  # 16 bit inst
+; CHECK: 	restore	$16, $17, $ra, [[FS]] # 16 bit inst
+; CHECK: 	.end	foo1
+}
+
+declare void @x(i8*) #1
+
+; Function Attrs: nounwind
+define void @foo2() #0 {
+entry:
+  %c = alloca [150 x i8], align 1
+  %arraydecay = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0
+  call void @x(i8* %arraydecay)
+  %arraydecay1 = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0
+  call void @x(i8* %arraydecay1)
+  ret void
+; CHECK: 	.ent	foo2
+; CHECK: 	save	$16, $17, $ra, [[FS:[0-9]+]] 
+; CHECK: 	restore	$16, $17, $ra, [[FS]] 
+; CHECK: 	.end	foo2
+}
+
+; Function Attrs: nounwind
+define void @foo3() #0 {
+entry:
+  %call = call float @xf()
+  store float %call, float* @f, align 4
+  ret void
+; CHECK: 	.ent	foo3
+; CHECK: 	save	$16, $17, $ra, $18, [[FS:[0-9]+]]
+; CHECK: 	restore	$16, $17, $ra, $18, [[FS]]
+; CHECK: 	.end	foo3
+; NEG: 	.ent	foo3
+; NEG-NOT: 	save	$16, $17, $ra, $18, [[FS:[0-9]+]] # 16 bit inst
+; NEG-NOT: 	restore	$16, $17, $ra, $18, [[FS]] # 16 bit inst
+; NEG: 	.end	foo3
+}
+
+declare float @xf() #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/start-asm-file.ll b/test/CodeGen/Mips/start-asm-file.ll
new file mode 100644
index 0000000000000..88724643166c2
--- /dev/null
+++ b/test/CodeGen/Mips/start-asm-file.ll
@@ -0,0 +1,91 @@
+; Check the emission of directives at the start of an asm file.
+; This test is XFAILED until we fix the emission of '.option pic0' on
+; N32. At the moment we check if subtarget is Mips64 when we should be
+; checking the Subtarget's ABI.
+
+; ### O32 ABI ###
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=static %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-O32 -check-prefix=CHECK-STATIC-O32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=pic %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-O32 -check-prefix=CHECK-PIC-O32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=static -mattr=+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-O32 -check-prefix=CHECK-STATIC-O32-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=pic -mattr=+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-O32 -check-prefix=CHECK-PIC-O32-N2008 %s
+
+; ### N32 ABI ###
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=-n64,+n32 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=-n64,+n32 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-N2008 %s
+
+; ### N64 ABI ###
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=+n64 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=+n64 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=+n64,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=+n64,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-N2008 %s
+
+; CHECK-STATIC-O32: .abicalls
+; CHECK-STATIC-O32: .option pic0
+; CHECK-STATIC-O32: .section .mdebug.abi32
+; CHECK-STATIC-O32-NLEGACY: .nan legacy
+; CHECK-STATIC-O32-N2008: .nan 2008
+
+; CHECK-PIC-O32: .abicalls
+; CHECK-PIC-O32-NOT: .option pic0
+; CHECK-PIC-O32: .section .mdebug.abi32
+; CHECK-PIC-O32-NLEGACY: .nan legacy
+; CHECK-PIC-O32-N2008: .nan 2008
+
+; CHECK-STATIC-N32: .abicalls
+; CHECK-STATIC-N32: .option pic0
+; CHECK-STATIC-N32: .section .mdebug.abiN32
+; CHECK-STATIC-N32-NLEGACY: .nan legacy
+; CHECK-STATIC-N32-N2008: .nan 2008
+
+; CHECK-PIC-N32: .abicalls
+; CHECK-PIC-N32-NOT: .option pic0
+; CHECK-PIC-N32: .section .mdebug.abiN32
+; CHECK-PIC-N32-NLEGACY: .nan legacy
+; CHECK-PIC-N32-N2008: .nan 2008
+
+; CHECK-STATIC-N64: .abicalls
+; CHECK-STATIC-N64-NOT: .option pic0
+; CHECK-STATIC-N64: .section .mdebug.abi64
+; CHECK-STATIC-N64-NLEGACY: .nan legacy
+; CHECK-STATIC-N64-N2008: .nan 2008
+
+; CHECK-PIC-N64: .abicalls
+; CHECK-PIC-N64-NOT: .option pic0
+; CHECK-PIC-N64: .section .mdebug.abi64
+; CHECK-PIC-N64-NLEGACY: .nan legacy
+; CHECK-PIC-N64-N2008: .nan 2008
diff --git a/test/CodeGen/Mips/tail16.ll b/test/CodeGen/Mips/tail16.ll
new file mode 100644
index 0000000000000..4e62e557478af
--- /dev/null
+++ b/test/CodeGen/Mips/tail16.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic   < %s | FileCheck %s 
+
+; Function Attrs: nounwind optsize
+define float @h()  {
+entry:
+  %call = tail call float bitcast (float (...)* @g to float ()*)() 
+  ret float %call
+; CHECK:	.ent	h
+; CHECK: 	save	$16, $ra, $18, 32
+; CHECK: 	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}})
+; CHECK: 	restore	$16, $ra, $18, 32
+; CHECK: 	.end	h
+}
+
+; Function Attrs: optsize
+declare float @g(...) 
+
+
+
+
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index 3c810542cca31..b61f84e037614 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s
 
 @foo = thread_local global i32 42
-@bar = hidden alias i32* @foo
+@bar = hidden thread_local alias i32* @foo
 
 define i32* @zed() {
 ; CHECK-DAG: __tls_get_addr
-; CHECK-DAG: %tlsgd(bar)
+; CHECK-DAG: %tlsldm(bar)
        ret i32* @bar
 }
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 23a8f93a9d7cf..b14ad5ba452b3 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \
-; RUN:     FileCheck %s -check-prefix=PIC
+; RUN:     FileCheck %s -check-prefix=PIC -check-prefix=CHECK
 ; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler < \
-; RUN:     %s | FileCheck %s -check-prefix=STATIC
+; RUN:     %s | FileCheck %s -check-prefix=STATIC -check-prefix=CHECK
 ; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler \
 ; RUN:     -mips-fix-global-base-reg=false < %s  | \
-; RUN:     FileCheck %s -check-prefix=STATICGP
+; RUN:     FileCheck %s -check-prefix=STATICGP -check-prefix=CHECK
 
 @t1 = thread_local global i32 0, align 4
 
diff --git a/test/CodeGen/Mips/trap1.ll b/test/CodeGen/Mips/trap1.ll
index bfcd7fed30d9b..90755130e7c22 100644
--- a/test/CodeGen/Mips/trap1.ll
+++ b/test/CodeGen/Mips/trap1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
 
 declare void @llvm.trap()
 
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 19f3af7f344a7..2002b1c60abe3 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -1,5 +1,9 @@
-; RUN: llc  < %s -march=mipsel  | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips    | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32R6-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32R6-EB
 %struct.S2 = type { %struct.S1, %struct.S1 }
 %struct.S1 = type { i8, i8 }
 %struct.S4 = type { [7 x i8] }
@@ -7,21 +11,71 @@
 @s2 = common global %struct.S2 zeroinitializer, align 1
 @s4 = common global %struct.S4 zeroinitializer, align 1
 
-define void @foo1() nounwind {
+define void @bar1() nounwind {
 entry:
-; CHECK-EL-DAG: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
-; CHECK-EL-DAG: lbu ${{[0-9]+}}, 3($[[R0]])
-; CHECK-EL:     jalr
-; CHECK-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
-; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]])
-
-; CHECK-EB-DAG: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
-; CHECK-EB-DAG: lbu ${{[0-9]+}}, 2($[[R0]])
-; CHECK-EB:     jalr
-; CHECK-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
-; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; ALL-LABEL: bar1:
+
+; ALL-DAG:       lw $[[R0:[0-9]+]], %got(s2)(
+
+; MIPS32-EL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8
+; MIPS32-EL-DAG: or  $4, $[[T0]], $[[PART1]]
+
+; MIPS32-EB-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EB-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[PART2]]
+; MIPS32-EB-DAG: sll $4, $[[T1]], 16
+
+; MIPS32R6-DAG:  lhu $[[PART1:[0-9]+]], 2($[[R0]])
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
+  ret void
+}
+
+define void @bar2() nounwind {
+entry:
+; ALL-LABEL: bar2:
+
+; ALL-DAG:       lw $[[R2:[0-9]+]], %got(s4)(
+
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[R2]])
+; MIPS32-EL-DAG: lwr $[[R1]], 0($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EL-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32-EL-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; MIPS32-EL-DAG: sll $[[T5:[0-9]+]], $[[T2]], 16
+; MIPS32-EL-DAG: or  $5, $[[T4]], $[[T5]]
+
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[R2]])
+; MIPS32-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EB-DAG: sll $[[T3:[0-9]+]], $[[T0]], 8
+; MIPS32-EB-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; MIPS32-EB-DAG: sll $[[T5:[0-9]+]], $[[T4]], 16
+; MIPS32-EB-DAG: sll $[[T6:[0-9]+]], $[[T2]], 8
+; MIPS32-EB-DAG: or  $5, $[[T5]], $[[T6]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EL-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EL-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EL-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EL-DAG: sll $[[T2:[0-9]+]], $[[T1]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[T0]], $[[T2]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EB-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EB-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EB-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EB-DAG: sll $[[T2:[0-9]+]], $[[T0]], 16
+; MIPS32R6-EB-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32R6-EB-DAG: or  $5, $[[T2]], $[[T3]]
+
   tail call void @foo4(%struct.S4* byval @s4) nounwind
   ret void
 }
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index e0e93e2e76826..a1b6cb0322b1a 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -1,21 +1,109 @@
-; RUN: llc < %s -march=mipsel | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc < %s -march=mipsel -mcpu=mips4    | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64R6
 
 @g1 = external global i32
 
-define i32 @foo0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
 entry:
-; CHECK:     movn ${{[0-9]+}}, $zero
+; ALL-LABEL: sel_icmp_nez_i32_z0:
+
+; 32-CMOV:       lw $2, 0(${{[0-9]+}})
+; 32-CMOV:       movn $2, $zero, $4
+
+; 32R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6:          seleqz $2, $[[R0]], $4
+
+; 64-CMOV:       lw $2, 0(${{[0-9]+}})
+; 64-CMOV:       movn $2, $zero, $4
+
+; 64R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          seleqz $2, $[[R0]], $4
+
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 0, i32 %0
   ret i32 %cond
 }
 
-define i32 @foo1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
 entry:
-; CHECK:     movz ${{[0-9]+}}, $zero
+; ALL-LABEL: sel_icmp_nez_i32_z1:
+
+; 32-CMOV:       lw $2, 0(${{[0-9]+}})
+; 32-CMOV:       movz $2, $zero, $4
+
+; 32R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6:          selnez $2, $[[R0]], $4
+
+; 64-CMOV:       lw $2, 0(${{[0-9]+}})
+; 64-CMOV:       movz $2, $zero, $4
+
+; 64R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          selnez $2, $[[R0]], $4
+
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 %0, i32 0
   ret i32 %cond
 }
+
+@g2 = external global i64
+
+define i64 @sel_icmp_nez_i64_z0(i64 %s) nounwind readonly {
+entry:
+; ALL-LABEL: sel_icmp_nez_i64_z0:
+
+; 32-CMOV-DAG:   lw $[[R0:2]], 0(${{[0-9]+}})
+; 32-CMOV-DAG:   lw $[[R1:3]], 4(${{[0-9]+}})
+; 32-CMOV-DAG:   movn $[[R0]], $zero, $4
+; 32-CMOV-DAG:   movn $[[R1]], $zero, $4
+
+; 32R6-DAG:      lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-DAG:      or $[[CC:[0-9]+]], $4, $5
+; 32R6-DAG:      seleqz $2, $[[R0]], $[[CC]]
+; 32R6-DAG:      seleqz $3, $[[R1]], $[[CC]]
+
+; 64-CMOV:       ld $2, 0(${{[0-9]+}})
+; 64-CMOV:       movn $2, $zero, $4
+
+; 64R6:          ld $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          seleqz $2, $[[R0]], $4
+
+  %tobool = icmp ne i64 %s, 0
+  %0 = load i64* @g2, align 4
+  %cond = select i1 %tobool, i64 0, i64 %0
+  ret i64 %cond
+}
+
+define i64 @sel_icmp_nez_i64_z1(i64 %s) nounwind readonly {
+entry:
+; ALL-LABEL: sel_icmp_nez_i64_z1:
+
+; 32-CMOV-DAG:   lw $[[R0:2]], 0(${{[0-9]+}})
+; 32-CMOV-DAG:   lw $[[R1:3]], 4(${{[0-9]+}})
+; 32-CMOV-DAG:   movz $[[R0]], $zero, $4
+; 32-CMOV-DAG:   movz $[[R1]], $zero, $4
+
+; 32R6-DAG:      lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-DAG:      or $[[CC:[0-9]+]], $4, $5
+; 32R6-DAG:      selnez $2, $[[R0]], $[[CC]]
+; 32R6-DAG:      selnez $3, $[[R1]], $[[CC]]
+
+; 64-CMOV:       ld $2, 0(${{[0-9]+}})
+; 64-CMOV:       movz $2, $zero, $4
+
+; 64R6:          ld $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          selnez $2, $[[R0]], $4
+
+  %tobool = icmp ne i64 %s, 0
+  %0 = load i64* @g2, align 4
+  %cond = select i1 %tobool, i64 %0, i64 0
+  ret i64 %cond
+}
author	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:08:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:08:18 +0000
commit	5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch)
tree	f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /test/CodeGen/Mips
parent	68bcb7db193e4bc81430063148253d30a791023e (diff)