vendor/clang/clang-trunk-r306325

author: Dimitry Andric <dim@FreeBSD.org> 2017-06-26 20:33:12 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-06-26 20:33:12 +0000
commit: ef915aab0ac566c55bfb0d7a9f6635bb5d94d4af (patch)
tree: ac935cfa19985d33098fc13e288b5ac830672dba /test
parent: 325377b57338e700317f5e423e5b0f1c08d99a39 (diff)
73 files changed, 3777 insertions, 707 deletions
diff --git a/test/Analysis/analyzer-config.c b/test/Analysis/analyzer-config.c
index c0153a50532a7..70521c63fbade 100644
--- a/test/Analysis/analyzer-config.c
+++ b/test/Analysis/analyzer-config.c
@@ -19,8 +19,8 @@ void foo() {
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
 // CHECK-NEXT: leak-diagnostics-reference-allocation = false
-// CHECK-NEXT: max-inlinable-size = 50
-// CHECK-NEXT: max-nodes = 150000
+// CHECK-NEXT: max-inlinable-size = 100
+// CHECK-NEXT: max-nodes = 225000
 // CHECK-NEXT: max-times-inline-large = 32
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
diff --git a/test/Analysis/analyzer-config.cpp b/test/Analysis/analyzer-config.cpp
index f84be17811608..60c03c1e497b2 100644
--- a/test/Analysis/analyzer-config.cpp
+++ b/test/Analysis/analyzer-config.cpp
@@ -30,8 +30,8 @@ public:
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
 // CHECK-NEXT: leak-diagnostics-reference-allocation = false
-// CHECK-NEXT: max-inlinable-size = 50
-// CHECK-NEXT: max-nodes = 150000
+// CHECK-NEXT: max-inlinable-size = 100
+// CHECK-NEXT: max-nodes = 225000
 // CHECK-NEXT: max-times-inline-large = 32
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
diff --git a/test/Analysis/builtin-assume.c b/test/Analysis/builtin-assume.c
deleted file mode 100644
index 00d651d9e3bec..0000000000000
--- a/test/Analysis/builtin-assume.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s
-
-void clang_analyzer_eval(int);
-
-void f(int i) {
-  __builtin_assume(i < 10);
-  clang_analyzer_eval(i < 15); // expected-warning {{TRUE}}
-}
diff --git a/test/Analysis/builtin-functions.cpp b/test/Analysis/builtin-functions.cpp
index 4e9859754d628..2c1950251145c 100644
--- a/test/Analysis/builtin-functions.cpp
+++ b/test/Analysis/builtin-functions.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,debug.ExprInspection %s -std=c++11 -verify
 
 void clang_analyzer_eval(bool);
+void clang_analyzer_warnIfReached();
 
 void testAddressof(int x) {
   clang_analyzer_eval(&x == __builtin_addressof(x)); // expected-warning{{TRUE}}
@@ -50,3 +51,16 @@ void test_assume_aligned_4(char *p) {
   q = (char*) __builtin_assume_aligned(p + 1, 16);
   clang_analyzer_eval(p == q); // expected-warning{{FALSE}}
 }
+
+void f(int i) {
+  __builtin_assume(i < 10);
+  clang_analyzer_eval(i < 15); // expected-warning {{TRUE}}
+}
+
+void g(int i) {
+  if (i > 5) {
+    __builtin_assume(i < 5);
+    clang_analyzer_warnIfReached(); // Assumtion contradicts constraints.
+                                    // We give up the analysis on this path.
+  }
+}
diff --git a/test/Analysis/copypaste/autogenerated_automoc.cpp b/test/Analysis/copypaste/autogenerated_automoc.cpp
new file mode 100644
index 0000000000000..55963c4545c9a
--- /dev/null
+++ b/test/Analysis/copypaste/autogenerated_automoc.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:IgnoredFilesPattern="moc_|.*_automoc.cpp" -verify %s
+
+// Because files that have `_automoc.' in their names are most likely autogenerated,
+// we suppress copy-paste warnings here.
+
+// expected-no-diagnostics
+
+void f1() {
+  int *p1 = new int[1];
+  int *p2 = new int[1];
+  if (p1) {
+    delete [] p1;
+    p1 = nullptr;
+  }
+  if (p2) {
+    delete [] p1; // no-warning
+    p2 = nullptr;
+  }
+}
diff --git a/test/Analysis/copypaste/dbus_autogenerated.cpp b/test/Analysis/copypaste/dbus_autogenerated.cpp
new file mode 100644
index 0000000000000..1824375658130
--- /dev/null
+++ b/test/Analysis/copypaste/dbus_autogenerated.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:IgnoredFilesPattern="moc_|dbus_|.*_automoc" -verify %s
+
+// Because files that have `dbus_' in their names are most likely autogenerated,
+// we suppress copy-paste warnings here.
+
+// expected-no-diagnostics
+
+void f1() {
+  int *p1 = new int[1];
+  int *p2 = new int[1];
+  if (p1) {
+    delete [] p1;
+    p1 = nullptr;
+  }
+  if (p2) {
+    delete [] p1; // no-warning
+    p2 = nullptr;
+  }
+}
diff --git a/test/Analysis/copypaste/moc_autogenerated.cpp b/test/Analysis/copypaste/moc_autogenerated.cpp
new file mode 100644
index 0000000000000..626fe2a3dd070
--- /dev/null
+++ b/test/Analysis/copypaste/moc_autogenerated.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:IgnoredFilesPattern="moc_|.*_automoc" -verify %s
+
+// Because files that have `moc_' in their names are most likely autogenerated,
+// we suppress copy-paste warnings here.
+
+// expected-no-diagnostics
+
+void f1() {
+  int *p1 = new int[1];
+  int *p2 = new int[1];
+  if (p1) {
+    delete [] p1;
+    p1 = nullptr;
+  }
+  if (p2) {
+    delete [] p1; // no-warning
+    p2 = nullptr;
+  }
+}
diff --git a/test/Analysis/copypaste/not-autogenerated.cpp b/test/Analysis/copypaste/not-autogenerated.cpp
new file mode 100644
index 0000000000000..765e7aaf2aab6
--- /dev/null
+++ b/test/Analysis/copypaste/not-autogenerated.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:IgnoredFilesPattern="moc_|ui_|dbus_|.*_automoc" -verify %s
+
+void f1() {
+  int *p1 = new int[1];
+  int *p2 = new int[1];
+  if (p1) {
+    delete [] p1; // expected-note{{Similar code using 'p1' here}}
+    p1 = nullptr;
+  }
+  if (p2) {
+    delete [] p1; // expected-warning{{Potential copy-paste error; did you really mean to use 'p1' here?}}
+    p2 = nullptr;
+  }
+}
diff --git a/test/Analysis/copypaste/ui_autogenerated.cpp b/test/Analysis/copypaste/ui_autogenerated.cpp
new file mode 100644
index 0000000000000..a08c33fe9e2a0
--- /dev/null
+++ b/test/Analysis/copypaste/ui_autogenerated.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:IgnoredFilesPattern="moc_|ui_|.*_automoc" -verify %s
+
+// Because files that have `ui_' in their names are most likely autogenerated,
+// we suppress copy-paste warnings here.
+
+// expected-no-diagnostics
+
+void f1() {
+  int *p1 = new int[1];
+  int *p2 = new int[1];
+  if (p1) {
+    delete [] p1;
+    p1 = nullptr;
+  }
+  if (p2) {
+    delete [] p1; // no-warning
+    p2 = nullptr;
+  }
+}
diff --git a/test/Analysis/null-deref-ps-region.c b/test/Analysis/null-deref-ps-region.c
index 6ef99ae473ca8..c46ca6c52ae34 100644
--- a/test/Analysis/null-deref-ps-region.c
+++ b/test/Analysis/null-deref-ps-region.c
@@ -1,6 +1,11 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core -std=gnu99 -analyzer-store=region -verify %s
-// expected-no-diagnostics
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core,unix,alpha.unix -std=gnu99 -analyzer-store=region -verify %s
 
+#include "Inputs/system-header-simulator.h"
+
+typedef __typeof(sizeof(int)) size_t;
+void *memset(void *__s, int __c, size_t __n);
+void *malloc(size_t __size);
+void free(void *__ptr);
 
 // The store for 'a[1]' should not be removed mistakenly. SymbolicRegions may
 // also be live roots.
@@ -13,3 +18,55 @@ void f14(int *a) {
     i = *p; // no-warning
   }
 }
+
+void foo() {
+  int *x = malloc(sizeof(int));
+  memset(x, 0, sizeof(int));
+  int n = 1 / *x; // FIXME: no-warning
+  free(x);
+}
+
+void bar() {
+  int *x = malloc(sizeof(int));
+  memset(x, 0, 1);
+  int n = 1 / *x; // no-warning
+  free(x);
+}
+
+void testConcreteNull() {
+  int *x = 0;
+  memset(x, 0, 1); // expected-warning {{Null pointer argument in call to memory set function}}
+}
+
+void testStackArray() {
+  char buf[13];
+  memset(buf, 0, 1); // no-warning
+}
+
+void testHeapSymbol() {
+  char *buf = (char *)malloc(13);
+  memset(buf, 0, 1); // no-warning
+  free(buf);
+}
+
+void testStackArrayOutOfBound() {
+  char buf[1];
+  memset(buf, 0, 1024); // expected-warning {{Memory set function accesses out-of-bound array element}}
+}
+
+void testHeapSymbolOutOfBound() {
+  char *buf = (char *)malloc(1);
+  memset(buf, 0, 1024); // expected-warning {{Memory set function accesses out-of-bound array element}}
+  free(buf);
+}
+
+void testStackArraySameSize() {
+  char buf[1];
+  memset(buf, 0, sizeof(buf)); // no-warning
+}
+
+void testHeapSymbolSameSize() {
+  char *buf = (char *)malloc(1);
+  memset(buf, 0, 1); // no-warning
+  free(buf);
+}
diff --git a/test/CXX/except/except.spec/p11.cpp b/test/CXX/except/except.spec/p11.cpp
index 1d0a647fb4f49..196f84c557ea5 100644
--- a/test/CXX/except/except.spec/p11.cpp
+++ b/test/CXX/except/except.spec/p11.cpp
@@ -1,12 +1,11 @@
 // RUN: %clang_cc1 -std=c++11 -fexceptions -fcxx-exceptions -fsyntax-only -verify %s
-// expected-no-diagnostics
 
 // This is the "let the user shoot themselves in the foot" clause.
-void f() noexcept {
-  throw 0; // no-error
+void f() noexcept { // expected-note {{non-throwing function declare here}}
+  throw 0; // expected-warning {{has a non-throwing exception specification but}} 
 }
-void g() throw() {
-  throw 0; // no-error
+void g() throw() { // expected-note {{non-throwing function declare here}}
+  throw 0; // expected-warning {{has a non-throwing exception specification but}} 
 }
 void h() throw(int) {
   throw 0.0; // no-error
diff --git a/test/CodeGen/64bit-swiftcall.c b/test/CodeGen/64bit-swiftcall.c
index 06c3145015529..92ba37cd7fe63 100644
--- a/test/CodeGen/64bit-swiftcall.c
+++ b/test/CodeGen/64bit-swiftcall.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -target-cpu core2 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -target-cpu core2 -emit-llvm -o - %s | FileCheck %s --check-prefix=X86-64
 // RUN: %clang_cc1 -triple arm64-apple-ios9 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple arm64-apple-ios9 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64
 
@@ -1014,3 +1015,20 @@ typedef struct {
 TEST(struct_v1f3)
 // ARM64-LABEL: define swiftcc { <2 x float>, float } @return_struct_v1f3()
 // ARM64-LABEL: define swiftcc void @take_struct_v1f3(<2 x float>, float)
+
+typedef struct {
+  int3 vect;
+  unsigned long long val;
+} __attribute__((packed)) padded_alloc_size_vector;
+TEST(padded_alloc_size_vector)
+// X86-64-LABEL: take_padded_alloc_size_vector(<3 x i32>, i64)
+// X86-64-NOT: [4 x i8]
+// x86-64: ret void
+
+typedef union {
+  float f1;
+  float3 fv2;
+} union_hom_fp_partial2;
+TEST(union_hom_fp_partial2)
+// X86-64-LABEL: take_union_hom_fp_partial2(i64, float)
+// ARM64-LABEL: take_union_hom_fp_partial2(i64, float)
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
index bcb680c4b518e..cbc2e72fcbacb 100644
--- a/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -9037,10 +9037,9 @@ int64x2_t test_vld1q_s64(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP3]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
+// CHECK:   ret <8 x half> [[TMP2]]
 float16x8_t test_vld1q_f16(float16_t const *a) {
   return vld1q_f16(a);
 }
@@ -9152,10 +9151,9 @@ int64x1_t test_vld1_s64(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP3]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
+// CHECK:   ret <4 x half> [[TMP2]]
 float16x4_t test_vld1_f16(float16_t const *a) {
   return vld1_f16(a);
 }
@@ -9342,10 +9340,10 @@ int64x2x2_t test_vld2q_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
@@ -9573,10 +9571,10 @@ int64x1x2_t test_vld2_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
@@ -9804,10 +9802,10 @@ int64x2x3_t test_vld3q_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
@@ -10035,10 +10033,10 @@ int64x1x3_t test_vld3_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
@@ -10266,10 +10264,10 @@ int64x2x4_t test_vld4q_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
@@ -10497,10 +10495,10 @@ int64x1x4_t test_vld4_s64(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
@@ -10666,9 +10664,9 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) {
 // CHECK-LABEL: @test_vst1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
 // CHECK:   ret void
 void test_vst1q_f16(float16_t *a, float16x8_t b) {
   vst1q_f16(a, b);
@@ -10800,9 +10798,9 @@ void test_vst1_s64(int64_t *a, int64x1_t b) {
 // CHECK-LABEL: @test_vst1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
 // CHECK:   ret void
 void test_vst1_f16(float16_t *a, float16x4_t b) {
   vst1_f16(a, b);
@@ -11056,9 +11054,9 @@ void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
   vst2q_f16(a, b);
@@ -11366,9 +11364,9 @@ void test_vst2_s64(int64_t *a, int64x1x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
   vst2_f16(a, b);
@@ -11716,10 +11714,10 @@ void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
   vst3q_f16(a, b);
@@ -12085,10 +12083,10 @@ void test_vst3_s64(int64_t *a, int64x1x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
   vst3_f16(a, b);
@@ -12494,11 +12492,11 @@ void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
   vst4q_f16(a, b);
@@ -12922,11 +12920,11 @@ void test_vst4_s64(int64_t *a, int64x1x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
   vst4_f16(a, b);
@@ -13208,10 +13206,10 @@ int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
@@ -13454,10 +13452,10 @@ int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
@@ -13700,10 +13698,10 @@ int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
@@ -13946,10 +13944,10 @@ int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
@@ -14192,10 +14190,10 @@ int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
@@ -14438,10 +14436,10 @@ int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
@@ -14752,10 +14750,10 @@ void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x2.v8f16.p0f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], half* [[TMP9]])
 // CHECK:   ret void
 void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
   vst1q_f16_x2(a, b);
@@ -15098,10 +15096,10 @@ void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x2.v4f16.p0f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], half* [[TMP9]])
 // CHECK:   ret void
 void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
   vst1_f16_x2(a, b);
@@ -15484,11 +15482,11 @@ void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x3.v8f16.p0f16(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], half* [[TMP12]])
 // CHECK:   ret void
 void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
   vst1q_f16_x3(a, b);
@@ -15894,11 +15892,11 @@ void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x3.v4f16.p0f16(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], half* [[TMP12]])
 // CHECK:   ret void
 void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
   vst1_f16_x3(a, b);
@@ -16344,12 +16342,12 @@ void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x4.v8f16.p0f16(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], half* [[TMP15]])
 // CHECK:   ret void
 void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
   vst1q_f16_x4(a, b);
@@ -16818,12 +16816,12 @@ void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x4.v4f16.p0f16(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], half* [[TMP15]])
 // CHECK:   ret void
 void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
   vst1_f16_x4(a, b);
diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c
index 9bd9ab1cb61bf..a3c5b140a0d26 100644
--- a/test/CodeGen/aarch64-neon-ldst-one.c
+++ b/test/CodeGen/aarch64-neon-ldst-one.c
@@ -90,12 +90,11 @@ int64x2_t test_vld1q_dup_s64(int64_t  *a) {
 
 // CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK:   ret <8 x half> [[LANE]]
 float16x8_t test_vld1q_dup_f16(float16_t  *a) {
   return vld1q_dup_f16(a);
 }
@@ -239,12 +238,11 @@ int64x1_t test_vld1_dup_s64(int64_t  *a) {
 
 // CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK:   ret <4 x half> [[LANE]]
 float16x4_t test_vld1_dup_f16(float16_t  *a) {
   return vld1_dup_f16(a);
 }
@@ -447,10 +445,10 @@ int64x2x2_t test_vld2q_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
@@ -693,10 +691,10 @@ int64x1x2_t test_vld2_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
@@ -947,10 +945,10 @@ int64x2x3_t test_vld3q_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
@@ -1207,10 +1205,10 @@ int64x1x3_t test_vld3_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
@@ -1459,10 +1457,10 @@ int64x2x4_t test_vld4q_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
@@ -1705,10 +1703,10 @@ int64x1x4_t test_vld4_dup_s64(int64_t  *a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
@@ -1897,12 +1895,11 @@ int64x2_t test_vld1q_lane_s64(int64_t  *a, int64x2_t b) {
 // CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]]
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]]
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
+// CHECK:   ret <8 x half> [[VLD1_LANE]]
 float16x8_t test_vld1q_lane_f16(float16_t  *a, float16x8_t b) {
   return vld1q_lane_f16(a, b, 7);
 }
@@ -2054,12 +2051,11 @@ int64x1_t test_vld1_lane_s64(int64_t  *a, int64x1_t b) {
 // CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]]
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]]
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
+// CHECK:   ret <4 x half> [[VLD1_LANE]]
 float16x4_t test_vld1_lane_f16(float16_t  *a, float16x4_t b) {
   return vld1_lane_f16(a, b, 3);
 }
@@ -2495,11 +2491,11 @@ int64x2x2_t test_vld2q_lane_s64(int64_t  *a, int64x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]]
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[VLD2_LANE:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0i8(<8 x half> [[TMP8]], <8 x half> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2_LANE]], { <8 x half>, <8 x half> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
@@ -2927,11 +2923,11 @@ int64x1x2_t test_vld2_lane_s64(int64_t  *a, int64x1x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]]
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[VLD2_LANE:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0i8(<4 x half> [[TMP8]], <4 x half> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2_LANE]], { <4 x half>, <4 x half> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
@@ -3364,12 +3360,12 @@ int64x2x3_t test_vld3q_lane_s64(int64_t  *a, int64x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]]
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[VLD3_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0i8(<8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
@@ -3889,12 +3885,12 @@ int64x1x3_t test_vld3_lane_s64(int64_t  *a, int64x1x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]]
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[VLD3_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0i8(<4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
@@ -4454,13 +4450,13 @@ int64x2x4_t test_vld4q_lane_s64(int64_t  *a, int64x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK:   [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]]
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
+// CHECK:   [[VLD4_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0i8(<8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
@@ -5043,13 +5039,13 @@ int64x1x4_t test_vld4_lane_s64(int64_t  *a, int64x1x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK:   [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]]
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
+// CHECK:   [[VLD4_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0i8(<4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
@@ -5361,10 +5357,10 @@ void test_vst1q_lane_s64(int64_t  *a, int64x2_t b) {
 // CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]]
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]]
 // CHECK:   ret void
 void test_vst1q_lane_f16(float16_t  *a, float16x8_t b) {
   vst1q_lane_f16(a, b, 7);
@@ -5517,10 +5513,10 @@ void test_vst1_lane_s64(int64_t  *a, int64x1_t b) {
 // CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]]
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]]
 // CHECK:   ret void
 void test_vst1_lane_f16(float16_t  *a, float16x4_t b) {
   vst1_lane_f16(a, b, 3);
@@ -5789,9 +5785,9 @@ void test_vst2q_lane_s64(int64_t  *a, int64x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2lane.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2q_lane_f16(float16_t  *a, float16x8x2_t b) {
   vst2q_lane_f16(a, b, 7);
@@ -6124,9 +6120,9 @@ void test_vst2_lane_s64(int64_t  *a, int64x1x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2lane.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2_lane_f16(float16_t  *a, float16x4x2_t b) {
   vst2_lane_f16(a, b, 3);
@@ -6499,10 +6495,10 @@ void test_vst3q_lane_s64(int64_t  *a, int64x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3lane.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3q_lane_f16(float16_t  *a, float16x8x3_t b) {
   vst3q_lane_f16(a, b, 7);
@@ -6898,10 +6894,10 @@ void test_vst3_lane_s64(int64_t  *a, int64x1x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3lane.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3_lane_f16(float16_t  *a, float16x4x3_t b) {
   vst3_lane_f16(a, b, 3);
@@ -7337,11 +7333,11 @@ void test_vst4q_lane_s64(int64_t  *a, int64x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4lane.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4q_lane_f16(float16_t  *a, float16x8x4_t b) {
   vst4q_lane_f16(a, b, 7);
@@ -7800,11 +7796,11 @@ void test_vst4_lane_s64(int64_t  *a, int64x1x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4lane.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4_lane_f16(float16_t  *a, float16x4x4_t b) {
   vst4_lane_f16(a, b, 3);
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
new file mode 100644
index 0000000000000..3f61238b64fb0
--- /dev/null
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -0,0 +1,1633 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: test_vabs_f16
+// CHECK:  [[ABS:%.*]] =  call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[ABS]]
+float16x4_t test_vabs_f16(float16x4_t a) {
+  return vabs_f16(a);
+}
+
+// CHECK-LABEL: test_vabsq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[ABS]]
+float16x8_t test_vabsq_f16(float16x8_t a) {
+  return vabsq_f16(a);
+}
+
+// CHECK-LABEL: test_vceqz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vceqz_f16(float16x4_t a) {
+  return vceqz_f16(a);
+}
+
+// CHECK-LABEL: test_vceqzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vceqzq_f16(float16x8_t a) {
+  return vceqzq_f16(a);
+}
+
+// CHECK-LABEL: test_vcgez_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgez_f16(float16x4_t a) {
+  return vcgez_f16(a);
+}
+
+// CHECK-LABEL: test_vcgezq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgezq_f16(float16x8_t a) {
+  return vcgezq_f16(a);
+}
+
+// CHECK-LABEL: test_vcgtz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgtz_f16(float16x4_t a) {
+  return vcgtz_f16(a);
+}
+
+// CHECK-LABEL: test_vcgtzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgtzq_f16(float16x8_t a) {
+  return vcgtzq_f16(a);
+}
+
+// CHECK-LABEL: test_vclez_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vclez_f16(float16x4_t a) {
+  return vclez_f16(a);
+}
+
+// CHECK-LABEL: test_vclezq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vclezq_f16(float16x8_t a) {
+  return vclezq_f16(a);
+}
+
+// CHECK-LABEL: test_vcltz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcltz_f16(float16x4_t a) {
+  return vcltz_f16(a);
+}
+
+// CHECK-LABEL: test_vcltzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcltzq_f16(float16x8_t a) {
+  return vcltzq_f16(a);
+}
+
+// CHECK-LABEL: test_vcvt_f16_s16
+// CHECK:  [[VCVT:%.*]] = sitofp <4 x i16> %a to <4 x half>
+// CHECK:  ret <4 x half> [[VCVT]]
+float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
+  return vcvt_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_f16_s16
+// CHECK:  [[VCVT:%.*]] = sitofp <8 x i16> %a to <8 x half>
+// CHECK:  ret <8 x half> [[VCVT]]
+float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
+  return vcvtq_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vcvt_f16_u16
+// CHECK:  [[VCVT:%.*]] = uitofp <4 x i16> %a to <4 x half>
+// CHECK:  ret <4 x half> [[VCVT]]
+float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
+  return vcvt_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_f16_u16
+// CHECK:  [[VCVT:%.*]] = uitofp <8 x i16> %a to <8 x half>
+// CHECK:  ret <8 x half> [[VCVT]]
+float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
+  return vcvtq_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vcvt_s16_f16
+// CHECK:  [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+  return vcvt_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_s16_f16
+// CHECK:  [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+  return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvt_u16_f16
+// CHECK:  [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+  return vcvt_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_u16_f16
+// CHECK:  [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+  return vcvtq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvta_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtas.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
+  return vcvta_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtaq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
+  return vcvtaq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtm_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtms.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
+  return vcvtm_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtms.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
+  return vcvtmq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtm_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtmu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
+  return vcvtm_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtmu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
+  return vcvtmq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtn_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtns.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
+  return vcvtn_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtns.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
+  return vcvtnq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtn_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtnu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
+  return vcvtn_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtnu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
+  return vcvtnq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtp_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtps.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
+  return vcvtp_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtpq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtps.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
+  return vcvtpq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtp_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtpu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
+  return vcvtp_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtpq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtpu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
+  return vcvtpq_u16_f16(a);
+}
+
+// FIXME: Fix the zero constant when fp16 non-storage-only type becomes available.
+// CHECK-LABEL: test_vneg_f16
+// CHECK:  [[NEG:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
+// CHECK:  ret <4 x half> [[NEG]]
+float16x4_t test_vneg_f16(float16x4_t a) {
+  return vneg_f16(a);
+}
+
+// CHECK-LABEL: test_vnegq_f16
+// CHECK:  [[NEG:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
+// CHECK:  ret <8 x half> [[NEG]]
+float16x8_t test_vnegq_f16(float16x8_t a) {
+  return vnegq_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpe_f16
+// CHECK:  [[RCP:%.*]] = call <4 x half> @llvm.aarch64.neon.frecpe.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RCP]]
+float16x4_t test_vrecpe_f16(float16x4_t a) {
+  return vrecpe_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpeq_f16
+// CHECK:  [[RCP:%.*]] = call <8 x half> @llvm.aarch64.neon.frecpe.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RCP]]
+float16x8_t test_vrecpeq_f16(float16x8_t a) {
+  return vrecpeq_f16(a);
+}
+
+// CHECK-LABEL: test_vrnd_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.trunc.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrnd_f16(float16x4_t a) {
+  return vrnd_f16(a);
+}
+
+// CHECK-LABEL: test_vrndq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndq_f16(float16x8_t a) {
+  return vrndq_f16(a);
+}
+
+// CHECK-LABEL: test_vrnda_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.round.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrnda_f16(float16x4_t a) {
+  return vrnda_f16(a);
+}
+
+// CHECK-LABEL: test_vrndaq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.round.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndaq_f16(float16x8_t a) {
+  return vrndaq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndi_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndi_f16(float16x4_t a) {
+  return vrndi_f16(a);
+}
+
+// CHECK-LABEL: test_vrndiq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndiq_f16(float16x8_t a) {
+  return vrndiq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndm_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.floor.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndm_f16(float16x4_t a) {
+  return vrndm_f16(a);
+}
+
+// CHECK-LABEL: test_vrndmq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndmq_f16(float16x8_t a) {
+  return vrndmq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndn_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndn_f16(float16x4_t a) {
+  return vrndn_f16(a);
+}
+
+// CHECK-LABEL: test_vrndnq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndnq_f16(float16x8_t a) {
+  return vrndnq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndp_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.ceil.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndp_f16(float16x4_t a) {
+  return vrndp_f16(a);
+}
+
+// CHECK-LABEL: test_vrndpq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndpq_f16(float16x8_t a) {
+  return vrndpq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndx_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.rint.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndx_f16(float16x4_t a) {
+  return vrndx_f16(a);
+}
+
+// CHECK-LABEL: test_vrndxq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndxq_f16(float16x8_t a) {
+  return vrndxq_f16(a);
+}
+
+// CHECK-LABEL: test_vrsqrte_f16
+// CHECK:  [[RND:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrte.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrsqrte_f16(float16x4_t a) {
+  return vrsqrte_f16(a);
+}
+
+// CHECK-LABEL: test_vrsqrteq_f16
+// CHECK:  [[RND:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrte.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrsqrteq_f16(float16x8_t a) {
+  return vrsqrteq_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrt_f16
+// CHECK:  [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[SQR]]
+float16x4_t test_vsqrt_f16(float16x4_t a) {
+  return vsqrt_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrtq_f16
+// CHECK:  [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[SQR]]
+float16x8_t test_vsqrtq_f16(float16x8_t a) {
+  return vsqrtq_f16(a);
+}
+
+// CHECK-LABEL: test_vadd_f16
+// CHECK:  [[ADD:%.*]] = fadd <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vadd_f16(float16x4_t a, float16x4_t b) {
+  return vadd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vaddq_f16
+// CHECK:  [[ADD:%.*]] = fadd <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
+  return vaddq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vabd_f16
+// CHECK:  [[ABD:%.*]] = call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[ABD]]
+float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
+  return vabd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vabdq_f16
+// CHECK:  [[ABD:%.*]] = call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[ABD]]
+float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
+  return vabdq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcage_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
+  return vcage_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcageq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
+  return vcageq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcagt_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
+  return vcagt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcagtq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
+  return vcagtq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcale_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
+  return vcale_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcaleq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
+  return vcaleq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcalt_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
+  return vcalt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcaltq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
+  return vcaltq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vceq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vceq_f16(float16x4_t a, float16x4_t b) {
+  return vceq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vceqq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vceqq_f16(float16x8_t a, float16x8_t b) {
+  return vceqq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcge_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcge_f16(float16x4_t a, float16x4_t b) {
+  return vcge_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgeq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgeq_f16(float16x8_t a, float16x8_t b) {
+  return vcgeq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgt_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgt_f16(float16x4_t a, float16x4_t b) {
+  return vcgt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgtq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgtq_f16(float16x8_t a, float16x8_t b) {
+  return vcgtq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcle_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcle_f16(float16x4_t a, float16x4_t b) {
+  return vcle_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcleq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcleq_f16(float16x8_t a, float16x8_t b) {
+  return vcleq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vclt_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vclt_f16(float16x4_t a, float16x4_t b) {
+  return vclt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcltq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcltq_f16(float16x8_t a, float16x8_t b) {
+  return vcltq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcvt_n_f16_s16
+// CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.aarch64.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <4 x half> [[CVT]]
+float16x4_t test_vcvt_n_f16_s16(int16x4_t a) {
+  return vcvt_n_f16_s16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_f16_s16
+// CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.aarch64.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <8 x half> [[CVT]]
+float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) {
+  return vcvtq_n_f16_s16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_f16_u16
+// CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.aarch64.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <4 x half> [[CVT]]
+float16x4_t test_vcvt_n_f16_u16(uint16x4_t a) {
+  return vcvt_n_f16_u16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_f16_u16
+// CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.aarch64.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <8 x half> [[CVT]]
+float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
+  return vcvtq_n_f16_u16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_s16_f16
+// CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
+// CHECK:  ret <4 x i16> [[CVT]]
+int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
+  return vcvt_n_s16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_s16_f16
+// CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
+// CHECK:  ret <8 x i16> [[CVT]]
+int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
+  return vcvtq_n_s16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_u16_f16
+// CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
+// CHECK:  ret <4 x i16> [[CVT]]
+uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
+  return vcvt_n_u16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_u16_f16
+// CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
+// CHECK:  ret <8 x i16> [[CVT]]
+uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) {
+  return vcvtq_n_u16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vdiv_f16
+// CHECK:  [[DIV:%.*]] = fdiv <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[DIV]]
+float16x4_t test_vdiv_f16(float16x4_t a, float16x4_t b) {
+  return vdiv_f16(a, b);
+}
+
+// CHECK-LABEL: test_vdivq_f16
+// CHECK:  [[DIV:%.*]] = fdiv <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[DIV]]
+float16x8_t test_vdivq_f16(float16x8_t a, float16x8_t b) {
+  return vdivq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmax_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmax.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
+  return vmax_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmax.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
+  return vmaxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnm_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
+  return vmaxnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnmq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
+  return vmaxnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmin_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fmin.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
+  return vmin_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fmin.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
+  return vminq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnm_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
+  return vminnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnmq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnm.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
+  return vminnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmul_f16
+// CHECK:  [[MUL:%.*]] = fmul <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[MUL]]
+float16x4_t test_vmul_f16(float16x4_t a, float16x4_t b) {
+  return vmul_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulq_f16
+// CHECK:  [[MUL:%.*]] = fmul <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
+  return vmulq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulx_f16
+// CHECK:  [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_f16(float16x4_t a, float16x4_t b) {
+  return vmulx_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulxq_f16
+// CHECK:  [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
+  return vmulxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpadd_f16
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
+  return vpadd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpaddq_f16
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
+  return vpaddq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmax_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
+  return vpmax_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vpmaxq_f16(float16x8_t a, float16x8_t b) {
+  return vpmaxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxnm_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vpmaxnm_f16(float16x4_t a, float16x4_t b) {
+  return vpmaxnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxnmq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vpmaxnmq_f16(float16x8_t a, float16x8_t b) {
+  return vpmaxnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmin_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
+  return vpmin_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vpminq_f16(float16x8_t a, float16x8_t b) {
+  return vpminq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminnm_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vpminnm_f16(float16x4_t a, float16x4_t b) {
+  return vpminnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminnmq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vpminnmq_f16(float16x8_t a, float16x8_t b) {
+  return vpminnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrecps_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.frecps.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
+  return vrecps_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrecpsq_f16
+// CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.aarch64.neon.frecps.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
+  return vrecpsq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrsqrts_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrts.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
+  return vrsqrts_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrsqrtsq_f16
+// CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.aarch64.neon.frsqrts.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
+  return vrsqrtsq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsub_f16
+// CHECK:  [[ADD:%.*]] = fsub <4 x half> %a, %b 
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vsub_f16(float16x4_t a, float16x4_t b) {
+  return vsub_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsubq_f16
+// CHECK:  [[ADD:%.*]] = fsub <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
+  return vsubq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vfma_f16
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfma_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmaq_f16
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmaq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfms_f16
+// CHECK:  [[SUB:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfms_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsq_f16
+// CHECK:  [[SUB:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmsq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfma_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfma_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmaq_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
+  return vfmaq_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfma_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
+  return vfma_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfmaq_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmaq_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfma_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
+  return vfma_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmaq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
+// CHECK: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
+// CHECK: ret <8 x half> [[FMA]]
+float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
+  return vfmaq_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmah_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
+  return vfmah_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmah_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
+  return vfmah_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfms_lane_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfms_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmsq_lane_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
+  return vfmsq_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfms_laneq_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
+  return vfms_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfmsq_laneq_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmsq_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfms_n_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
+  return vfms_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsq_n_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
+// CHECK: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
+// CHECK: ret <8 x half> [[FMA]]
+float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
+  return vfmsq_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsh_lane_f16
+// CHECK: [[TMP0:%.*]] = fpext half %b to float
+// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
+// CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
+  return vfmsh_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmsh_laneq_f16
+// CHECK: [[TMP0:%.*]] = fpext half %b to float
+// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
+// CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
+  return vfmsh_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vmul_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP0]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
+  return vmul_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulq_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP0]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
+  return vmulq_lane_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmul_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP0]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_laneq_f16(float16x4_t a, float16x8_t b) {
+  return vmul_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulq_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP0]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_laneq_f16(float16x8_t a, float16x8_t b) {
+  return vmulq_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmul_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP3]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
+  return vmul_n_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %b, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %b, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %b, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %b, i32 7
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP7]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
+  return vmulq_n_f16(a, b);
+}
+
+// FIXME: Fix it when fp16 non-storage-only type becomes available.
+// CHECK-LABEL: test_vmulh_lane_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulh_lane_f16(float16_t a, float16x4_t b) {
+  return vmulh_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulh_laneq_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulh_laneq_f16(float16_t a, float16x8_t b) {
+  return vmulh_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
+  return vmulx_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulxq_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
+  return vmulxq_lane_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
+  return vmulx_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulxq_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
+  return vmulxq_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[MUL:%.*]]  = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
+  return vmulx_n_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulxq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %b, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %b, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %b, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %b, i32 7
+// CHECK: [[MUL:%.*]]  = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP7]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
+  return vmulxq_n_f16(a, b);
+}
+
+/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
+// CCHECK-LABEL: test_vmulxh_lane_f16
+// CCHECK: [[CONV0:%.*]] = fpext half %a to float
+// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CCHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
+  return vmulxh_lane_f16(a, b, 3);
+}
+
+// CCHECK-LABEL: test_vmulxh_laneq_f16
+// CCHECK: [[CONV0:%.*]] = fpext half %a to float
+// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CCHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
+  return vmulxh_laneq_f16(a, b, 7);
+}
+*/
+
+// CHECK-LABEL: test_vmaxv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxv_f16(float16x4_t a) {
+  return vmaxv_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxvq_f16(float16x8_t a) {
+  return vmaxvq_f16(a);
+}
+
+// CHECK-LABEL: test_vminv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminv_f16(float16x4_t a) {
+  return vminv_f16(a);
+}
+
+// CHECK-LABEL: test_vminvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminvq_f16(float16x8_t a) {
+  return vminvq_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxnmv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxnmv_f16(float16x4_t a) {
+  return vmaxnmv_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxnmvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxnmvq_f16(float16x8_t a) {
+  return vmaxnmvq_f16(a);
+}
+
+// CHECK-LABEL: test_vminnmv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminnmv_f16(float16x4_t a) {
+  return vminnmv_f16(a);
+}
+
+// CHECK-LABEL: test_vminnmvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminnmvq_f16(float16x8_t a) {
+  return vminnmvq_f16(a);
+}
+
+// CHECK-LABEL: test_vbsl_f16
+// CHECK:  [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK:  [[TMP1:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK:  [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK:  [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK:  [[TMP4:%.*]] = and <4 x i16> %a, [[TMP2]]
+// CHECK:  [[TMP5:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:  [[TMP6:%.*]] = and <4 x i16> [[TMP5]], [[TMP3]]
+// CHECK:  [[TMP7:%.*]] = or <4 x i16> [[TMP4]], [[TMP6]]
+// CHECK:  [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <4 x half>
+// CHECK:  ret <4 x half> [[TMP8]]
+float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
+  return vbsl_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vbslq_f16
+// CHECK:  [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK:  [[TMP1:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK:  [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK:  [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK:  [[TMP4:%.*]] = and <8 x i16> %a, [[TMP2]]
+// CHECK:  [[TMP5:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:  [[TMP6:%.*]] = and <8 x i16> [[TMP5]], [[TMP3]]
+// CHECK:  [[TMP7:%.*]] = or <8 x i16> [[TMP4]], [[TMP6]]
+// CHECK:  [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <8 x half>
+// CHECK:  ret <8 x half> [[TMP8]]
+float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
+  return vbslq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vzip_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
+float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
+  return vzip_f16(a, b);
+}
+
+// CHECK-LABEL: test_vzipq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
+float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
+  return vzipq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vuzp_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
+float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
+  return vuzp_f16(a, b);
+}
+
+// CHECK-LABEL: test_vuzpq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
+float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
+  return vuzpq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vtrn_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
+float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
+  return vtrn_f16(a, b);
+}
+
+// CHECK-LABEL: test_vtrnq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] 
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32>  <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
+float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
+  return vtrnq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmov_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %a, i32 3
+// CHECK:   ret <4 x half> [[TMP3]]
+float16x4_t test_vmov_n_f16(float16_t a) {
+  return vmov_n_f16(a);
+}
+
+// CHECK-LABEL: test_vmovq_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %a, i32 3
+// CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %a, i32 4
+// CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %a, i32 5
+// CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %a, i32 6
+// CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %a, i32 7
+// CHECK:   ret <8 x half> [[TMP7]]
+float16x8_t test_vmovq_n_f16(float16_t a) {
+  return vmovq_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdup_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %a, i32 3
+// CHECK:   ret <4 x half> [[TMP3]]
+float16x4_t test_vdup_n_f16(float16_t a) {
+  return vdup_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdupq_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %a, i32 3
+// CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %a, i32 4
+// CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %a, i32 5
+// CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %a, i32 6
+// CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %a, i32 7
+// CHECK:   ret <8 x half> [[TMP7]]
+float16x8_t test_vdupq_n_f16(float16_t a) {
+  return vdupq_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdup_lane_f16
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vdup_lane_f16(float16x4_t a) {
+  return vdup_lane_f16(a, 3);
+}
+
+// CHECK-LABEL: test_vdupq_lane_f16
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vdupq_lane_f16(float16x4_t a) {
+  return vdupq_lane_f16(a, 7);
+}
+
+// CHECK-LABEL: @test_vext_f16(
+// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK:   ret <4 x half> [[VEXT]]
+float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
+  return vext_f16(a, b, 2);
+}
+
+// CHECK-LABEL: @test_vextq_f16(
+// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+// CHECK:   ret <8 x half> [[VEXT]]
+float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
+  return vextq_f16(a, b, 5);
+}
+
+// CHECK-LABEL: @test_vrev64_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vrev64_f16(float16x4_t a) {
+  return vrev64_f16(a);
+}
+
+// CHECK-LABEL: @test_vrev64q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vrev64q_f16(float16x8_t a) {
+  return vrev64q_f16(a);
+}
+
+// CHECK-LABEL: @test_vzip1_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) {
+  return vzip1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip1q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) {
+  return vzip1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip2_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) {
+  return vzip2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip2q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) {
+  return vzip2q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp1_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) {
+  return vuzp1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp1q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) {
+  return vuzp1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp2_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) {
+  return vuzp2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp2q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) {
+  return vuzp2q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn1_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) {
+  return vtrn1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn1q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32>  <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) {
+  return vtrn1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn2_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
+  return vtrn2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn2q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
+  return vtrn2q_f16(a, b);
+}
+
diff --git a/test/CodeGen/address-space.c b/test/CodeGen/address-space.c
index 35e3dbdcfa737..54e059385772b 100644
--- a/test/CodeGen/address-space.c
+++ b/test/CodeGen/address-space.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s
 // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s
-// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s
 
 // CHECK: @foo = common addrspace(1) global
 int foo __attribute__((address_space(1)));
diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c
index 62888dd73339a..b01c90c03a96d 100644
--- a/test/CodeGen/arm_neon_intrinsics.c
+++ b/test/CodeGen/arm_neon_intrinsics.c
@@ -3896,9 +3896,8 @@ int64x2_t test_vld1q_s64(int64_t const * a) {
 
 // CHECK-LABEL: @test_vld1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VLD1]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP1]]
+// CHECK:   [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0i8(i8* [[TMP0]], i32 2)
+// CHECK:   ret <8 x half> [[VLD1]]
 float16x8_t test_vld1q_f16(float16_t const * a) {
   return vld1q_f16(a);
 }
@@ -3990,9 +3989,8 @@ int64x1_t test_vld1_s64(int64_t const * a) {
 
 // CHECK-LABEL: @test_vld1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
-// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VLD1]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP1]]
+// CHECK:   [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0i8(i8* [[TMP0]], i32 2)
+// CHECK:   ret <4 x half> [[VLD1]]
 float16x4_t test_vld1_f16(float16_t const * a) {
   return vld1_f16(a);
 }
@@ -4106,12 +4104,11 @@ int64x2_t test_vld1q_dup_s64(int64_t const * a) {
 
 // CHECK-LABEL: @test_vld1q_dup_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK:   ret <8 x half> [[LANE]]
 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
   return vld1q_dup_f16(a);
 }
@@ -4233,12 +4230,11 @@ int64x1_t test_vld1_dup_s64(int64_t const * a) {
 
 // CHECK-LABEL: @test_vld1_dup_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK:   ret <4 x half> [[LANE]]
 float16x4_t test_vld1_dup_f16(float16_t const * a) {
   return vld1_dup_f16(a);
 }
@@ -4365,12 +4361,11 @@ int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
 // CHECK-LABEL: @test_vld1q_lane_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
+// CHECK:   ret <8 x half> [[VLD1_LANE]]
 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
   return vld1q_lane_f16(a, b, 7);
 }
@@ -4498,12 +4493,11 @@ int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
 // CHECK-LABEL: @test_vld1_lane_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
+// CHECK:   ret <4 x half> [[VLD1_LANE]]
 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
   return vld1_lane_f16(a, b, 3);
 }
@@ -4596,7 +4590,7 @@ int32x4x2_t test_vld2q_s32(int32_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK:   [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
 float16x8x2_t test_vld2q_f16(float16_t const * a) {
   return vld2q_f16(a);
 }
@@ -4701,7 +4695,7 @@ int64x1x2_t test_vld2_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK:   [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
 float16x4x2_t test_vld2_f16(float16_t const * a) {
   return vld2_f16(a);
 }
@@ -4806,7 +4800,7 @@ int64x1x2_t test_vld2_dup_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK:   [[VLD_DUP:%.*]] = call { <4 x half>, <4 x half>
 float16x4x2_t test_vld2_dup_f16(float16_t const * a) {
   return vld2_dup_f16(a);
 }
@@ -4965,9 +4959,9 @@ int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
   return vld2q_lane_f16(a, b, 7);
 }
@@ -5198,9 +5192,9 @@ int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
   return vld2_lane_f16(a, b, 3);
 }
@@ -5337,7 +5331,7 @@ int32x4x3_t test_vld3q_s32(int32_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK:   [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
 float16x8x3_t test_vld3q_f16(float16_t const * a) {
   return vld3q_f16(a);
 }
@@ -5442,7 +5436,7 @@ int64x1x3_t test_vld3_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
 float16x4x3_t test_vld3_f16(float16_t const * a) {
   return vld3_f16(a);
 }
@@ -5547,7 +5541,7 @@ int64x1x3_t test_vld3_dup_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[VLD_DUP:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
 float16x4x3_t test_vld3_dup_f16(float16_t const * a) {
   return vld3_dup_f16(a);
 }
@@ -5730,10 +5724,10 @@ int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
   return vld3q_lane_f16(a, b, 7);
 }
@@ -6004,10 +5998,10 @@ int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
   return vld3_lane_f16(a, b, 3);
 }
@@ -6157,7 +6151,7 @@ int32x4x4_t test_vld4q_s32(int32_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK:   [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
 float16x8x4_t test_vld4q_f16(float16_t const * a) {
   return vld4q_f16(a);
 }
@@ -6262,7 +6256,7 @@ int64x1x4_t test_vld4_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
 float16x4x4_t test_vld4_f16(float16_t const * a) {
   return vld4_f16(a);
 }
@@ -6367,7 +6361,7 @@ int64x1x4_t test_vld4_dup_s64(int64_t const * a) {
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[VLD_DUP:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
 float16x4x4_t test_vld4_dup_f16(float16_t const * a) {
   return vld4_dup_f16(a);
 }
@@ -6574,11 +6568,11 @@ int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
-// CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
+// CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
   return vld4q_lane_f16(a, b, 7);
 }
@@ -6889,11 +6883,11 @@ int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
-// CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
   return vld4_lane_f16(a, b, 3);
 }
@@ -15784,8 +15778,8 @@ void test_vst1q_s64(int64_t * a, int64x2_t b) {
 // CHECK-LABEL: @test_vst1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* [[TMP0]], <8 x half> [[TMP2]], i32 2)
 // CHECK:   ret void
 void test_vst1q_f16(float16_t * a, float16x8_t b) {
   vst1q_f16(a, b);
@@ -15895,8 +15889,8 @@ void test_vst1_s64(int64_t * a, int64x1_t b) {
 // CHECK-LABEL: @test_vst1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* [[TMP0]], <4 x half> [[TMP2]], i32 2)
 // CHECK:   ret void
 void test_vst1_f16(float16_t * a, float16x4_t b) {
   vst1_f16(a, b);
@@ -16018,10 +16012,10 @@ void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
 // CHECK-LABEL: @test_vst1q_lane_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
 // CHECK:   ret void
 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
   vst1q_lane_f16(a, b, 7);
@@ -16150,10 +16144,10 @@ void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
 // CHECK-LABEL: @test_vst1_lane_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
 // CHECK:   ret void
 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
   vst1_lane_f16(a, b, 3);
@@ -16355,9 +16349,9 @@ void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
 // CHECK:   ret void
 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
   vst2q_f16(a, b);
@@ -16652,9 +16646,9 @@ void test_vst2_s64(int64_t * a, int64x1x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
 // CHECK:   ret void
 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
   vst2_f16(a, b);
@@ -16855,9 +16849,9 @@ void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
 // CHECK:   ret void
 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
   vst2q_lane_f16(a, b, 7);
@@ -17079,9 +17073,9 @@ void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
 // CHECK:   ret void
 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
   vst2_lane_f16(a, b, 3);
@@ -17354,10 +17348,10 @@ void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
 // CHECK:   ret void
 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
   vst3q_f16(a, b);
@@ -17705,10 +17699,10 @@ void test_vst3_s64(int64_t * a, int64x1x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
 // CHECK:   ret void
 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
   vst3_f16(a, b);
@@ -17946,10 +17940,10 @@ void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
 // CHECK:   ret void
 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
   vst3q_lane_f16(a, b, 7);
@@ -18211,10 +18205,10 @@ void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
 // CHECK:   ret void
 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
   vst3_lane_f16(a, b, 3);
@@ -18530,11 +18524,11 @@ void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
 // CHECK:   ret void
 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
   vst4q_f16(a, b);
@@ -18935,11 +18929,11 @@ void test_vst4_s64(int64_t * a, int64x1x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
 // CHECK:   ret void
 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
   vst4_f16(a, b);
@@ -19214,11 +19208,11 @@ void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
+// CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
 // CHECK:   ret void
 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
   vst4q_lane_f16(a, b, 7);
@@ -19520,11 +19514,11 @@ void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
+// CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
 // CHECK:   ret void
 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
   vst4_lane_f16(a, b, 3);
diff --git a/test/CodeGen/default-address-space.c b/test/CodeGen/default-address-space.c
index 07ddf48fac2fa..fc5f55ffd6f45 100644
--- a/test/CodeGen/default-address-space.c
+++ b/test/CodeGen/default-address-space.c
@@ -22,9 +22,10 @@ int *B;
 int test1() { return foo; }
 
 // COM-LABEL: define i32 @test2(i32 %i)
-// PIZ: load i32, i32 addrspace(4)*
+// COM: %[[addr:.*]] = getelementptr
+// PIZ: load i32, i32 addrspace(4)* %[[addr]]
 // PIZ-NEXT: ret i32
-// CHECK: load i32, i32*
+// CHECK: load i32, i32* %[[addr]]
 // CHECK-NEXT: ret i32
 int test2(int i) { return ban[i]; }
 
@@ -42,15 +43,17 @@ void test3() {
 }
 
 // PIZ-LABEL: define void @test4(i32 addrspace(4)* %a)
-// PIZ: %[[a_addr:.*]] = alloca i32 addrspace(4)*
-// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)** %[[a_addr]]
-// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[a_addr]]
+// PIZ: %[[alloca:.*]] = alloca i32 addrspace(4)*
+// PIZ: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32 addrspace(4)* addrspace(4)*
+// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
+// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
 // PIZ: %[[arrayidx:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[r0]]
 // PIZ: store i32 0, i32 addrspace(4)* %[[arrayidx]]
 // CHECK-LABEL: define void @test4(i32* %a)
-// CHECK: %[[a_addr:.*]] = alloca i32*, align 4, addrspace(5)
-// CHECK: store i32* %a, i32* addrspace(5)* %[[a_addr]]
-// CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[a_addr]]
+// CHECK: %[[alloca:.*]] = alloca i32*, align 4, addrspace(5)
+// CHECK: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32**
+// CHECK: store i32* %a, i32** %[[a_addr]]
+// CHECK: %[[r0:.*]] = load i32*, i32** %[[a_addr]]
 // CHECK: %[[arrayidx:.*]] = getelementptr inbounds i32, i32* %[[r0]]
 // CHECK: store i32 0, i32* %[[arrayidx]]
 void test4(int *a) {
diff --git a/test/CodeGen/mcount.c b/test/CodeGen/mcount.c
index 7f915841952eb..2839d8ef6af33 100644
--- a/test/CodeGen/mcount.c
+++ b/test/CodeGen/mcount.c
@@ -1,18 +1,18 @@
 // RUN: %clang_cc1 -pg -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -pg -triple i386-unknown-unknown -emit-llvm -O2 -o - %s | FileCheck %s
-// RUN: %clang_cc1 -pg -triple powerpc-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple powerpc64-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple powerpc64le-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple i386-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple powerpc-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple powerpc64-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple powerpc64le-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple sparc-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
-// RUN: %clang_cc1 -pg -triple sparc64-netbsd -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-PREFIXED %s
+// RUN: %clang_cc1 -pg -triple powerpc-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple powerpc64-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple powerpc64le-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple i386-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple powerpc-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple powerpc64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple powerpc64le-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple sparc-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
+// RUN: %clang_cc1 -pg -triple sparc64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s -check-prefix=NO-MCOUNT
 
 int bar(void) {
@@ -23,10 +23,17 @@ int foo(void) {
   return bar();
 }
 
-int main(void) {
+int __attribute__((no_instrument_function)) no_instrument(void) {
   return foo();
 }
 
-// CHECK: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-PREFIXED: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="_mcount"{{.*}} }
+int main(void) {
+  return no_instrument();
+}
+
+// CHECK: attributes #0 = { {{.*}}"counting-function"="mcount"{{.*}} }
+// CHECK: attributes #1 = { {{.*}} }
+// CHECK-PREFIXED: attributes #0 = { {{.*}}"counting-function"="_mcount"{{.*}} }
+// CHECK-PREFIXED: attributes #1 = { {{.*}} }
 // NO-MCOUNT-NOT: attributes #{{[0-9]}} = { {{.*}}"counting-function"={{.*}} }
+// NO-MCOUNT1-NOT: attributes #1 = { {{.*}}"counting-function"={{.*}} }
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index 5182d7f2e81ac..d26a660c9b0a6 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -627,6 +627,12 @@ void t43() {
 // CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
 }
 
+void dot_operator(){
+// CHECK-LABEL: define void @dot_operator
+	__asm { mov eax, 3[ebx]A.b}
+// CHECK: call void asm sideeffect inteldialect "mov eax, $$3[ebx].4", "~{eax},~{dirflag},~{fpsr},~{flags}"
+}
+
 void call_clobber() {
   __asm call t41
   // CHECK-LABEL: define void @call_clobber
diff --git a/test/CodeGen/ms-intrinsics-other.c b/test/CodeGen/ms-intrinsics-other.c
new file mode 100644
index 0000000000000..d23bc7301801c
--- /dev/null
+++ b/test/CodeGen/ms-intrinsics-other.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -ffreestanding -fms-extensions \
+// RUN:         -triple x86_64--darwin -Oz -emit-llvm %s -o - \
+// RUN:         | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -fms-extensions \
+// RUN:         -triple x86_64--linux -Oz -emit-llvm %s -o - \
+// RUN:         | FileCheck %s
+
+// LP64 targets use 'long' as 'int' for MS intrinsics (-fms-extensions)
+#ifdef __LP64__
+#define LONG int
+#else
+#define LONG long
+#endif
+
+unsigned char test_BitScanForward(unsigned LONG *Index, unsigned LONG Mask) {
+  return _BitScanForward(Index, Mask);
+}
+// CHECK: define{{.*}}i8 @test_BitScanForward(i32* {{[a-z_ ]*}}%Index, i32 {{[a-z_ ]*}}%Mask){{.*}}{
+// CHECK:   [[ISNOTZERO:%[a-z0-9._]+]] = icmp eq i32 %Mask, 0
+// CHECK:   br i1 [[ISNOTZERO]], label %[[END_LABEL:[a-z0-9._]+]], label %[[ISNOTZERO_LABEL:[a-z0-9._]+]]
+// CHECK:   [[END_LABEL]]:
+// CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK:   [[ISNOTZERO_LABEL]]:
+// CHECK:   [[INDEX:%[0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %Mask, i1 true)
+// CHECK:   store i32 [[INDEX]], i32* %Index, align 4
+// CHECK:   br label %[[END_LABEL]]
+
+unsigned char test_BitScanReverse(unsigned LONG *Index, unsigned LONG Mask) {
+  return _BitScanReverse(Index, Mask);
+}
+// CHECK: define{{.*}}i8 @test_BitScanReverse(i32* {{[a-z_ ]*}}%Index, i32 {{[a-z_ ]*}}%Mask){{.*}}{
+// CHECK:   [[ISNOTZERO:%[0-9]+]] = icmp eq i32 %Mask, 0
+// CHECK:   br i1 [[ISNOTZERO]], label %[[END_LABEL:[a-z0-9._]+]], label %[[ISNOTZERO_LABEL:[a-z0-9._]+]]
+// CHECK:   [[END_LABEL]]:
+// CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK:   [[ISNOTZERO_LABEL]]:
+// CHECK:   [[REVINDEX:%[0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %Mask, i1 true)
+// CHECK:   [[INDEX:%[0-9]+]] = xor i32 [[REVINDEX]], 31
+// CHECK:   store i32 [[INDEX]], i32* %Index, align 4
+// CHECK:   br label %[[END_LABEL]]
+
+#if defined(__x86_64__)
+unsigned char test_BitScanForward64(unsigned LONG *Index, unsigned __int64 Mask) {
+  return _BitScanForward64(Index, Mask);
+}
+// CHECK: define{{.*}}i8 @test_BitScanForward64(i32* {{[a-z_ ]*}}%Index, i64 {{[a-z_ ]*}}%Mask){{.*}}{
+// CHECK:   [[ISNOTZERO:%[a-z0-9._]+]] = icmp eq i64 %Mask, 0
+// CHECK:   br i1 [[ISNOTZERO]], label %[[END_LABEL:[a-z0-9._]+]], label %[[ISNOTZERO_LABEL:[a-z0-9._]+]]
+// CHECK:   [[END_LABEL]]:
+// CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK:   [[ISNOTZERO_LABEL]]:
+// CHECK:   [[INDEX:%[0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %Mask, i1 true)
+// CHECK:   [[TRUNC_INDEX:%[0-9]+]] = trunc i64 [[INDEX]] to i32
+// CHECK:   store i32 [[TRUNC_INDEX]], i32* %Index, align 4
+// CHECK:   br label %[[END_LABEL]]
+
+unsigned char test_BitScanReverse64(unsigned LONG *Index, unsigned __int64 Mask) {
+  return _BitScanReverse64(Index, Mask);
+}
+// CHECK: define{{.*}}i8 @test_BitScanReverse64(i32* {{[a-z_ ]*}}%Index, i64 {{[a-z_ ]*}}%Mask){{.*}}{
+// CHECK:   [[ISNOTZERO:%[0-9]+]] = icmp eq i64 %Mask, 0
+// CHECK:   br i1 [[ISNOTZERO]], label %[[END_LABEL:[a-z0-9._]+]], label %[[ISNOTZERO_LABEL:[a-z0-9._]+]]
+// CHECK:   [[END_LABEL]]:
+// CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK:   [[ISNOTZERO_LABEL]]:
+// CHECK:   [[REVINDEX:%[0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %Mask, i1 true)
+// CHECK:   [[TRUNC_REVINDEX:%[0-9]+]] = trunc i64 [[REVINDEX]] to i32
+// CHECK:   [[INDEX:%[0-9]+]] = xor i32 [[TRUNC_REVINDEX]], 63
+// CHECK:   store i32 [[INDEX]], i32* %Index, align 4
+// CHECK:   br label %[[END_LABEL]]
+#endif
+
+LONG test_InterlockedExchange(LONG volatile *value, LONG mask) {
+  return _InterlockedExchange(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedExchange(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedExchangeAdd(LONG volatile *value, LONG mask) {
+  return _InterlockedExchangeAdd(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedExchangeAdd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedExchangeSub(LONG volatile *value, LONG mask) {
+  return _InterlockedExchangeSub(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedExchangeSub(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedOr(LONG volatile *value, LONG mask) {
+  return _InterlockedOr(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedOr(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedXor(LONG volatile *value, LONG mask) {
+  return _InterlockedXor(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedXor(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedAnd(LONG volatile *value, LONG mask) {
+  return _InterlockedAnd(value, mask);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedAnd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask seq_cst
+// CHECK:   ret i32 [[RESULT:%[0-9]+]]
+// CHECK: }
+
+LONG test_InterlockedCompareExchange(LONG volatile *Destination, LONG Exchange, LONG Comperand) {
+  return _InterlockedCompareExchange(Destination, Exchange, Comperand);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedCompareExchange(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange seq_cst seq_cst
+// CHECK: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
+// CHECK: ret i32 [[RESULT]]
+// CHECK: }
+
+LONG test_InterlockedIncrement(LONG volatile *Addend) {
+  return _InterlockedIncrement(Addend);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedIncrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 seq_cst
+// CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
+// CHECK: ret i32 [[RESULT]]
+// CHECK: }
+
+LONG test_InterlockedDecrement(LONG volatile *Addend) {
+  return _InterlockedDecrement(Addend);
+}
+// CHECK: define{{.*}}i32 @test_InterlockedDecrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 seq_cst
+// CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
+// CHECK: ret i32 [[RESULT]]
+// CHECK: }
+
+unsigned char test_interlockedbittestandset(volatile LONG *ptr, LONG bit) {
+  return _interlockedbittestandset(ptr, bit);
+}
+// CHECK-LABEL: define{{.*}} i8 @test_interlockedbittestandset
+// CHECK: [[MASKBIT:%[0-9]+]] = shl i32 1, %bit
+// CHECK: [[OLD:%[0-9]+]] = atomicrmw or i32* %ptr, i32 [[MASKBIT]] seq_cst
+// CHECK: [[SHIFT:%[0-9]+]] = lshr i32 [[OLD]], %bit
+// CHECK: [[TRUNC:%[0-9]+]] = trunc i32 [[SHIFT]] to i8
+// CHECK: [[AND:%[0-9]+]] = and i8 [[TRUNC]], 1
+// CHECK: ret i8 [[AND]]
diff --git a/test/CodeGen/ms-intrinsics-rotations.c b/test/CodeGen/ms-intrinsics-rotations.c
index 65d25cbe85eb9..9533e6c3c6a22 100644
--- a/test/CodeGen/ms-intrinsics-rotations.c
+++ b/test/CodeGen/ms-intrinsics-rotations.c
@@ -1,181 +1,169 @@
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple i686--windows -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple thumbv7--windows -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple x86_64--windows -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple i686--linux -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple x86_64--linux -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
-
-// rotate left
-
-unsigned char test_rotl8(unsigned char value, unsigned char shift) {
-  return _rotl8(value, shift);
-}
-// CHECK: i8 @test_rotl8
-// CHECK:   [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i8 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i8 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i8 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i8 [[VALUE]], i8 [[ROTATED]]
-// CHECK:   ret i8 [[RESULT]]
-// CHECK  }
-
-unsigned short test_rotl16(unsigned short value, unsigned char shift) {
-  return _rotl16(value, shift);
-}
-// CHECK: i16 @test_rotl16
-// CHECK:   [[SHIFT:%[0-9]+]] = and i16 %{{[0-9]+}}, 15
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i16 16, [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i16 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i16 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i16 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i16 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i16 [[VALUE]], i16 [[ROTATED]]
-// CHECK:   ret i16 [[RESULT]]
-// CHECK  }
-
-unsigned int test_rotl(unsigned int value, int shift) {
-  return _rotl(value, shift);
-}
-// CHECK: i32 @test_rotl
-// CHECK:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
-// CHECK:   ret i32 [[RESULT]]
-// CHECK  }
-
-unsigned long test_lrotl(unsigned long value, int shift) {
-  return _lrotl(value, shift);
-}
-// CHECK-32BIT-LONG: i32 @test_lrotl
-// CHECK-32BIT-LONG:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
-// CHECK-32BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
-// CHECK-32BIT-LONG:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK-32BIT-LONG:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE]], [[NEGSHIFT]]
-// CHECK-32BIT-LONG:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
-// CHECK-32BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
-// CHECK-32BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
-// CHECK-32BIT-LONG:   ret i32 [[RESULT]]
-// CHECK-32BIT-LONG  }
-
-// CHECK-64BIT-LONG: i64 @test_lrotl
-// CHECK-64BIT-LONG:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
-// CHECK-64BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
-// CHECK-64BIT-LONG:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK-64BIT-LONG:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE]], [[NEGSHIFT]]
-// CHECK-64BIT-LONG:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
-// CHECK-64BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
-// CHECK-64BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
-// CHECK-64BIT-LONG:   ret i64 [[RESULT]]
-// CHECK-64BIT-LONG  }
-
-unsigned __int64 test_rotl64(unsigned __int64 value, int shift) {
-  return _rotl64(value, shift);
-}
-// CHECK: i64 @test_rotl64
-// CHECK:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
-// CHECK:   ret i64 [[RESULT]]
-// CHECK  }
-
-// rotate right
-
-unsigned char test_rotr8(unsigned char value, unsigned char shift) {
-  return _rotr8(value, shift);
-}
-// CHECK: i8 @test_rotr8
-// CHECK:   [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i8 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i8 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i8 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i8 [[VALUE]], i8 [[ROTATED]]
-// CHECK:   ret i8 [[RESULT]]
-// CHECK  }
-
-unsigned short test_rotr16(unsigned short value, unsigned char shift) {
-  return _rotr16(value, shift);
-}
-// CHECK: i16 @test_rotr16
-// CHECK:   [[SHIFT:%[0-9]+]] = and i16 %{{[0-9]+}}, 15
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i16 16, [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i16 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i16 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i16 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i16 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i16 [[VALUE]], i16 [[ROTATED]]
-// CHECK:   ret i16 [[RESULT]]
-// CHECK  }
-
-unsigned int test_rotr(unsigned int value, int shift) {
-  return _rotr(value, shift);
-}
-// CHECK: i32 @test_rotr
-// CHECK:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
-// CHECK:   ret i32 [[RESULT]]
-// CHECK  }
-
-unsigned long test_lrotr(unsigned long value, int shift) {
-  return _lrotr(value, shift);
-}
-// CHECK-32BIT-LONG: i32 @test_lrotr
-// CHECK-32BIT-LONG:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
-// CHECK-32BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
-// CHECK-32BIT-LONG:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK-32BIT-LONG:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE]], [[NEGSHIFT]]
-// CHECK-32BIT-LONG:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
-// CHECK-32BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
-// CHECK-32BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
-// CHECK-32BIT-LONG:   ret i32 [[RESULT]]
-// CHECK-32BIT-LONG  }
-
-// CHECK-64BIT-LONG: i64 @test_lrotr
-// CHECK-64BIT-LONG:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
-// CHECK-64BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
-// CHECK-64BIT-LONG:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK-64BIT-LONG:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE]], [[NEGSHIFT]]
-// CHECK-64BIT-LONG:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
-// CHECK-64BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
-// CHECK-64BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
-// CHECK-64BIT-LONG:   ret i64 [[RESULT]]
-// CHECK-64BIT-LONG  }
-
-unsigned __int64 test_rotr64(unsigned __int64 value, int shift) {
-  return _rotr64(value, shift);
-}
-// CHECK: i64 @test_rotr64
-// CHECK:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
-// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
-// CHECK:   ret i64 [[RESULT]]
-// CHECK  }
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -fms-extensions \
+// RUN:         -triple x86_64--darwin -emit-llvm %s -o - \
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+
+// LP64 targets use 'long' as 'int' for MS intrinsics (-fms-extensions)
+#ifdef __LP64__
+#define LONG int
+#else
+#define LONG long
+#endif
+
+// rotate left
+
+unsigned char test_rotl8(unsigned char value, unsigned char shift) {
+  return _rotl8(value, shift);
+}
+// CHECK: i8 @test_rotl8
+// CHECK:   [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i8 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i8 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i8 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i8 [[VALUE]], i8 [[ROTATED]]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK  }
+
+unsigned short test_rotl16(unsigned short value, unsigned char shift) {
+  return _rotl16(value, shift);
+}
+// CHECK: i16 @test_rotl16
+// CHECK:   [[SHIFT:%[0-9]+]] = and i16 %{{[0-9]+}}, 15
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i16 16, [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i16 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i16 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i16 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i16 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i16 [[VALUE]], i16 [[ROTATED]]
+// CHECK:   ret i16 [[RESULT]]
+// CHECK  }
+
+unsigned int test_rotl(unsigned int value, int shift) {
+  return _rotl(value, shift);
+}
+// CHECK: i32 @test_rotl
+// CHECK:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
+// CHECK:   ret i32 [[RESULT]]
+// CHECK  }
+
+unsigned LONG test_lrotl(unsigned LONG value, int shift) {
+  return _lrotl(value, shift);
+}
+// CHECK-32BIT-LONG: i32 @test_lrotl
+// CHECK-32BIT-LONG:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
+// CHECK-32BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
+// CHECK-32BIT-LONG:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK-32BIT-LONG:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE]], [[NEGSHIFT]]
+// CHECK-32BIT-LONG:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
+// CHECK-32BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
+// CHECK-32BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
+// CHECK-32BIT-LONG:   ret i32 [[RESULT]]
+// CHECK-32BIT-LONG  }
+
+unsigned __int64 test_rotl64(unsigned __int64 value, int shift) {
+  return _rotl64(value, shift);
+}
+// CHECK: i64 @test_rotl64
+// CHECK:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
+// CHECK:   ret i64 [[RESULT]]
+// CHECK  }
+
+// rotate right
+
+unsigned char test_rotr8(unsigned char value, unsigned char shift) {
+  return _rotr8(value, shift);
+}
+// CHECK: i8 @test_rotr8
+// CHECK:   [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i8 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i8 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i8 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i8 [[VALUE]], i8 [[ROTATED]]
+// CHECK:   ret i8 [[RESULT]]
+// CHECK  }
+
+unsigned short test_rotr16(unsigned short value, unsigned char shift) {
+  return _rotr16(value, shift);
+}
+// CHECK: i16 @test_rotr16
+// CHECK:   [[SHIFT:%[0-9]+]] = and i16 %{{[0-9]+}}, 15
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i16 16, [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i16 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i16 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i16 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i16 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i16 [[VALUE]], i16 [[ROTATED]]
+// CHECK:   ret i16 [[RESULT]]
+// CHECK  }
+
+unsigned int test_rotr(unsigned int value, int shift) {
+  return _rotr(value, shift);
+}
+// CHECK: i32 @test_rotr
+// CHECK:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
+// CHECK:   ret i32 [[RESULT]]
+// CHECK  }
+
+unsigned LONG test_lrotr(unsigned LONG value, int shift) {
+  return _lrotr(value, shift);
+}
+// CHECK-32BIT-LONG: i32 @test_lrotr
+// CHECK-32BIT-LONG:   [[SHIFT:%[0-9]+]] = and i32 %{{[0-9]+}}, 31
+// CHECK-32BIT-LONG:   [[NEGSHIFT:%[0-9]+]] = sub i32 32, [[SHIFT]]
+// CHECK-32BIT-LONG:   [[LOW:%[0-9]+]] = lshr i32 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK-32BIT-LONG:   [[HIGH:%[0-9]+]] = shl i32 [[VALUE]], [[NEGSHIFT]]
+// CHECK-32BIT-LONG:   [[ROTATED:%[0-9]+]] = or i32 [[HIGH]], [[LOW]]
+// CHECK-32BIT-LONG:   [[ISZERO:%[0-9]+]] = icmp eq i32 [[SHIFT]], 0
+// CHECK-32BIT-LONG:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i32 [[VALUE]], i32 [[ROTATED]]
+// CHECK-32BIT-LONG:   ret i32 [[RESULT]]
+// CHECK-32BIT-LONG  }
+
+unsigned __int64 test_rotr64(unsigned __int64 value, int shift) {
+  return _rotr64(value, shift);
+}
+// CHECK: i64 @test_rotr64
+// CHECK:   [[SHIFT:%[0-9]+]] = and i64 %{{[0-9]+}}, 63
+// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i64 64, [[SHIFT]]
+// CHECK:   [[LOW:%[0-9]+]] = lshr i64 [[VALUE:%[0-9]+]], [[SHIFT]]
+// CHECK:   [[HIGH:%[0-9]+]] = shl i64 [[VALUE]], [[NEGSHIFT]]
+// CHECK:   [[ROTATED:%[0-9]+]] = or i64 [[HIGH]], [[LOW]]
+// CHECK:   [[ISZERO:%[0-9]+]] = icmp eq i64 [[SHIFT]], 0
+// CHECK:   [[RESULT:%[0-9]+]] = select i1 [[ISZERO]], i64 [[VALUE]], i64 [[ROTATED]]
+// CHECK:   ret i64 [[RESULT]]
+// CHECK  }
diff --git a/test/CodeGen/no-devirt.cpp b/test/CodeGen/no-devirt.cpp
new file mode 100644
index 0000000000000..4333b7cde7c6e
--- /dev/null
+++ b/test/CodeGen/no-devirt.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 %s -DUSEIT -triple %itanium_abi_triple -emit-llvm -o - |  FileCheck %s
+
+// Test with decls and template defs in pch, and just use in .cpp
+// RUN:  %clang_cc1 %s -DTMPL_DEF_IN_HEADER -triple %itanium_abi_triple -emit-pch -o %t
+// RUN:  %clang_cc1 %s -DTMPL_DEF_IN_HEADER -DUSEIT -triple %itanium_abi_triple -include-pch %t -emit-llvm -o - | FileCheck %s
+
+// Test with A in pch, and B and C in main
+// Test with just decls in pch, and template defs and use in .cpp
+// RUN:  %clang_cc1 %s -triple %itanium_abi_triple -emit-pch -o %t
+// RUN:  %clang_cc1 %s -DUSEIT -triple %itanium_abi_triple -include-pch %t -emit-llvm -o - | FileCheck %s
+
+#ifndef HEADER
+#define HEADER
+template < typename T, int N = 0 > class TmplWithArray {
+public:
+  virtual T& operator [] (int idx);
+  virtual T& func1 (int idx);
+  virtual T& func2 (int idx);
+  T ar[N+1];
+};
+struct Wrapper {
+  TmplWithArray<bool, 10> data;
+  bool indexIt(int a) {
+    if (a > 6) return data[a] ;      // Should not devirtualize
+    if (a > 4) return data.func1(a); // Should devirtualize
+    return data.func2(a);            // Should devirtualize
+  }
+};
+
+#ifdef TMPL_DEF_IN_HEADER
+template <typename T, int N> T& TmplWithArray<T, N >::operator[](int idx) {
+  return ar[idx];
+}
+template <typename T, int N> T& TmplWithArray<T, N >::func1(int idx) {
+  return ar[idx];
+}
+#endif // TMPL_DEF_IN_HEADER
+#endif // HEADER
+
+#ifdef USEIT
+#ifndef TMPL_DEF_IN_HEADER
+template <typename T, int N> T& TmplWithArray<T, N >::operator[](int idx) {
+  return ar[idx];
+}
+template <typename T, int N> T& TmplWithArray<T, N >::func1(int idx) {
+  return ar[idx];
+}
+#endif // !TMPL_DEF_IN_HEADER
+extern Wrapper ew;
+bool stuff(int p)
+{
+  return ew.indexIt(p);
+}
+#endif
+
+// CHECK-NOT: call {{.*}} @_ZN13TmplWithArrayIbLi10EEixEi
+// CHECK-DAG: call {{.*}} @_ZN13TmplWithArrayIbLi10EE5func1Ei
+// CHECK-DAG: call {{.*}} @_ZN13TmplWithArrayIbLi10EE5func2Ei
+
diff --git a/test/CodeGen/pr27892.c b/test/CodeGen/pr27892.c
deleted file mode 100644
index 57722c4671ab2..0000000000000
--- a/test/CodeGen/pr27892.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fms-extensions %s -emit-llvm -o - | FileCheck %s
-
-long test1(long *p) {
-  return _InterlockedIncrement(p);
-}
-// CHECK-DAG: define i64 @test1(
-// CHECK:   %[[p_addr:.*]] = alloca i64*, align 8
-// CHECK:   store i64* %p, i64** %[[p_addr]], align 8
-// CHECK:   %[[p_load:.*]] = load i64*, i64** %[[p_addr]], align 8
-// CHECK:   %[[atomic_add:.*]] = atomicrmw add i64* %[[p_load]], i64 1 seq_cst
-// CHECK:   %[[res:.*]] = add i64 %[[atomic_add]], 1
-// CHECK:   ret i64 %[[res]]
-
-long test2(long *p) {
-  return _InterlockedDecrement(p);
-}
-// CHECK-DAG: define i64 @test2(
-// CHECK:   %[[p_addr:.*]] = alloca i64*, align 8
-// CHECK:   store i64* %p, i64** %[[p_addr]], align 8
-// CHECK:   %[[p_load:.*]] = load i64*, i64** %[[p_addr]], align 8
-// CHECK:   %[[atomic_sub:.*]] = atomicrmw sub i64* %[[p_load]], i64 1 seq_cst
-// CHECK:   %[[res:.*]] = sub i64 %[[atomic_sub]], 1
-// CHECK:   ret i64 %[[res]]
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 1e8ce6a2fd12f..68ee8f02d2eee 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -175,7 +175,7 @@
 
 // RUN: %clang_cc1 -triple msp430-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=MSP430
-// MSP430: target datalayout = "e-m:e-p:16:16-i32:16:32-a:16-n8:16"
+// MSP430: target datalayout = "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16"
 
 // RUN: %clang_cc1 -triple tce-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=TCE
diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c
index 167f72ca2cfda..fa244fb908e05 100644
--- a/test/CodeGen/vectorcall.c
+++ b/test/CodeGen/vectorcall.c
@@ -100,8 +100,19 @@ void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
 // X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
 // X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
 
-// The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't 
+// The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't
 // consider 'p7' as a register.  Instead p5 gets put into the register on the second pass.
-struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;}
-// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
-// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7)
+// x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
+struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7, int p8){ return p1;}
+// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
+// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
+
+// Vectorcall in both architectures allows passing of an HVA as long as there is room,
+// even if it is not one of the first 6 arguments.  First pass puts p4 into a
+// register on both.  p9 ends up in a register in x86 only.  Second pass puts p1
+// in a register, does NOT put p7 in a register (since theres no room), then puts 
+// p8 in a register.
+void __vectorcall HVAAnywhere(struct HFA2 p1, int p2, int p3, float p4, int p5, int p6, struct HFA4 p7, struct HFA2 p8, float p9){}
+// X32: define x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
+// X64: define x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
+
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 9f375d780c94a..d24ea4dbab3dd 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -460,7 +460,7 @@ void test54() {
   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
 }
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
-// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
 
 typedef float __m512 __attribute__ ((__vector_size__ (64)));
 typedef struct {
@@ -529,7 +529,7 @@ void f63(__m512 *m, __builtin_va_list argList) {
 }
 
 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
-// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
 void f64_helper(__m512, ...);
 __m512 x64;
 void f64() {
diff --git a/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/test/CodeGenCXX/amdgcn-automatic-variable.cpp
index aab720770d128..7df27c28e6d2c 100644
--- a/test/CodeGenCXX/amdgcn-automatic-variable.cpp
+++ b/test/CodeGenCXX/amdgcn-automatic-variable.cpp
@@ -3,9 +3,10 @@
 // CHECK-LABEL: define void @_Z5func1Pi(i32* %x)
 void func1(int *x) {
   // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5)
-  // CHECK: store i32* %x, i32* addrspace(5)* %[[x_addr]]
-  // CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]]
-  // CHECK: store i32 1, i32* %[[r0]]
+  // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32**
+  // CHECK: store i32* %x, i32** %[[r0]]
+  // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]]
+  // CHECK: store i32 1, i32* %[[r1]]
   *x = 1;
 }
 
@@ -70,3 +71,12 @@ void func3() {
   // CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]])
   A a;
 }
+
+// CHECK-LABEL: define void @_Z5func4i
+void func4(int x) {
+  // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5)
+  // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32*
+  // CHECK: store i32 %x, i32* %[[r0]], align 4
+  // CHECK: call void @_Z5func1Pi(i32* %[[r0]])
+  func1(&x);
+}
diff --git a/test/CodeGenObjC/objc_copyStruct.m b/test/CodeGenObjC/objc_copyStruct.m
new file mode 100644
index 0000000000000..7fa0ad54f3aec
--- /dev/null
+++ b/test/CodeGenObjC/objc_copyStruct.m
@@ -0,0 +1,16 @@
+// RUN: %clang -target x86_64-unknown-windows-msvc -fobjc-runtime=ios -Wno-objc-root-class -S -o - -emit-llvm %s | FileCheck %s
+// RUN: %clang -target x86_64-apple-ios -fobjc-runtime=ios -Wno-objc-root-class -S -o - -emit-llvm %s | FileCheck %s
+
+struct S {
+  float f, g;
+};
+
+@interface I
+@property struct S s;
+@end
+
+@implementation I
+@end
+
+// CHECK: declare {{.*}}void @objc_copyStruct(i8*, i8*, i64, i1, i1)
+
diff --git a/test/CodeGenObjC/ubsan-nonnull-and-nullability.m b/test/CodeGenObjC/ubsan-nonnull-and-nullability.m
index b927f55cd4029..db6588dac310c 100644
--- a/test/CodeGenObjC/ubsan-nonnull-and-nullability.m
+++ b/test/CodeGenObjC/ubsan-nonnull-and-nullability.m
@@ -7,16 +7,26 @@
 // CHECK-LABEL: define nonnull i32* @f1
 __attribute__((returns_nonnull)) int *_Nonnull f1(int *_Nonnull p) {
   // CHECK: entry:
+  // CHECK-NEXT: [[SLOC_PTR:%.*]] = alloca i8*
   // CHECK-NEXT: [[ADDR:%.*]] = alloca i32*
+  // CHECK-NEXT: store i8* null, i8** [[SLOC_PTR]]
   // CHECK-NEXT: store i32* [[P:%.*]], i32** [[ADDR]]
+  // CHECK-NEXT: store {{.*}} [[SLOC_PTR]]
   // CHECK-NEXT: [[ARG:%.*]] = load i32*, i32** [[ADDR]]
+  // CHECK-NEXT: [[SLOC:%.*]] = load {{.*}} [[SLOC_PTR]]
+  // CHECK-NEXT: [[SLOC_NONNULL:%.*]] = icmp ne i8* [[SLOC]], null
+  // CHECK-NEXT: br i1 [[SLOC_NONNULL]], label %nullcheck
+  // 
+  // CHECK: nullcheck:
   // CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32* [[ARG]], null, !nosanitize
   // CHECK-NEXT: br i1 [[ICMP]], label %[[CONT:.+]], label %[[HANDLE:[^,]+]]
   // CHECK: [[HANDLE]]:
-  // CHECK-NEXT:   call void @__ubsan_handle_nonnull_return_abort
+  // CHECK:      call void @__ubsan_handle_nonnull_return
   // CHECK-NEXT:   unreachable, !nosanitize
   // CHECK: [[CONT]]:
-  // CHECK-NEXT:   ret i32*
+  // CHECK-NEXT:   br label %no.nullcheck
+  // CHECK: no.nullcheck:
+  // CHECK-NEXT: ret i32* [[ARG]]
   return p;
 }
 
@@ -29,3 +39,23 @@ void call_f2() {
   // CHECK-NOT: call void @__ubsan_handle_nonnull_arg_abort
   f2((void *)0);
 }
+
+// If the return value isn't meant to be checked, make sure we don't check it.
+// CHECK-LABEL: define i32* @f3
+int *f3(int *p) {
+  // CHECK-NOT: return.sloc
+  // CHECK-NOT: call{{.*}}ubsan
+  return p;
+}
+
+// Check for a valid "return" source location, even when there is no return
+// statement, to avoid accidentally calling the runtime.
+
+// CHECK-LABEL: define nonnull i32* @f4
+__attribute__((returns_nonnull)) int *f4() {
+  // CHECK: store i8* null, i8** [[SLOC_PTR:%.*]]
+  // CHECK: [[SLOC:%.*]] = load {{.*}} [[SLOC_PTR]]
+  // CHECK: [[SLOC_NONNULL:%.*]] = icmp ne i8* [[SLOC]], null
+  // CHECK: br i1 [[SLOC_NONNULL]], label %nullcheck
+  // CHECK: nullcheck:
+}
diff --git a/test/CodeGenObjC/ubsan-nullability.m b/test/CodeGenObjC/ubsan-nullability.m
index 7f53ea6292eea..eeb24b03c868e 100644
--- a/test/CodeGenObjC/ubsan-nullability.m
+++ b/test/CodeGenObjC/ubsan-nullability.m
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -x objective-c -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=nullability-arg,nullability-assign,nullability-return -w %s -o - | FileCheck %s
 // RUN: %clang_cc1 -x objective-c++ -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=nullability-arg,nullability-assign,nullability-return -w %s -o - | FileCheck %s
 
-// CHECK: [[NONNULL_RV_LOC1:@.*]] = private unnamed_addr global {{.*}} i32 109, i32 1 {{.*}} i32 100, i32 6
+// CHECK: [[NONNULL_RV_LOC1:@.*]] = private unnamed_addr global {{.*}} i32 100, i32 6
 // CHECK: [[NONNULL_ARG_LOC:@.*]] = private unnamed_addr global {{.*}} i32 204, i32 15 {{.*}} i32 190, i32 23
 // CHECK: [[NONNULL_ASSIGN1_LOC:@.*]] = private unnamed_addr global {{.*}} i32 305, i32 9
 // CHECK: [[NONNULL_ASSIGN2_LOC:@.*]] = private unnamed_addr global {{.*}} i32 405, i32 10
@@ -10,7 +10,7 @@
 // CHECK: [[NONNULL_INIT1_LOC:@.*]] = private unnamed_addr global {{.*}} i32 604, i32 25
 // CHECK: [[NONNULL_INIT2_LOC1:@.*]] = private unnamed_addr global {{.*}} i32 707, i32 26
 // CHECK: [[NONNULL_INIT2_LOC2:@.*]] = private unnamed_addr global {{.*}} i32 707, i32 29
-// CHECK: [[NONNULL_RV_LOC2:@.*]] = private unnamed_addr global {{.*}} i32 817, i32 1 {{.*}} i32 800, i32 6
+// CHECK: [[NONNULL_RV_LOC2:@.*]] = private unnamed_addr global {{.*}} i32 800, i32 6
 
 #define NULL ((void *)0)
 #define INULL ((int *)NULL)
@@ -19,14 +19,11 @@
 // CHECK-LABEL: define i32* @{{.*}}nonnull_retval1
 #line 100
 int *_Nonnull nonnull_retval1(int *p) {
-  // CHECK: br i1 true, label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
-  // CHECK: [[NULL]]:
   // CHECK: [[ICMP:%.*]] = icmp ne i32* {{.*}}, null, !nosanitize
-  // CHECK-NEXT: br i1 [[ICMP]], {{.*}}, !nosanitize
+  // CHECK: br i1 [[ICMP]], {{.*}}, !nosanitize
   // CHECK: call void @__ubsan_handle_nullability_return{{.*}}[[NONNULL_RV_LOC1]]
   return p;
-  // CHECK: [[NONULL]]:
-  // CHECK-NEXT: ret i32*
+  // CHECK: ret i32*
 }
 
 #line 190
@@ -108,10 +105,13 @@ int *_Nonnull nonnull_retval2(int *_Nonnull arg1,  //< Test this.
   // CHECK-NEXT: [[DO_RV_CHECK_1:%.*]] = and i1 true, [[ARG1CMP]], !nosanitize
   // CHECK: [[ARG2CMP:%.*]] = icmp ne i32* %arg2, null, !nosanitize
   // CHECK-NEXT: [[DO_RV_CHECK_2:%.*]] = and i1 [[DO_RV_CHECK_1]], [[ARG2CMP]]
-  // CHECK: br i1 [[DO_RV_CHECK_2]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
+  // CHECK: [[SLOC_PTR:%.*]] = load i8*, i8** %return.sloc.ptr
+  // CHECK-NEXT: [[SLOC_NONNULL:%.*]] = icmp ne i8* [[SLOC_PTR]], null
+  // CHECK-NEXT: [[DO_RV_CHECK_3:%.*]] = and i1 [[SLOC_NONNULL]], [[DO_RV_CHECK_2]]
+  // CHECK: br i1 [[DO_RV_CHECK_3]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
   // CHECK: [[NULL]]:
   // CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32* {{.*}}, null, !nosanitize
-  // CHECK-NEXT: br i1 [[ICMP]], {{.*}}, !nosanitize
+  // CHECK: br i1 [[ICMP]], {{.*}}, !nosanitize
   // CHECK: call void @__ubsan_handle_nullability_return{{.*}}[[NONNULL_RV_LOC2]]
   return arg1;
   // CHECK: [[NONULL]]:
@@ -129,10 +129,13 @@ int *_Nonnull nonnull_retval2(int *_Nonnull arg1,  //< Test this.
 +(int *_Nonnull) objc_clsmethod: (int *_Nonnull) arg1 {
   // CHECK: [[ARG1CMP:%.*]] = icmp ne i32* %arg1, null, !nosanitize
   // CHECK-NEXT: [[DO_RV_CHECK:%.*]] = and i1 true, [[ARG1CMP]]
-  // CHECK: br i1 [[DO_RV_CHECK]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
+  // CHECK: [[SLOC_PTR:%.*]] = load i8*, i8** %return.sloc.ptr
+  // CHECK-NEXT: [[SLOC_NONNULL:%.*]] = icmp ne i8* [[SLOC_PTR]], null
+  // CHECK-NEXT: [[DO_RV_CHECK_2:%.*]] = and i1 [[SLOC_NONNULL]], [[DO_RV_CHECK]]
+  // CHECK: br i1 [[DO_RV_CHECK_2]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
   // CHECK: [[NULL]]:
   // CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32* {{.*}}, null, !nosanitize
-  // CHECK-NEXT: br i1 [[ICMP]], {{.*}}, !nosanitize
+  // CHECK: br i1 [[ICMP]], {{.*}}, !nosanitize
   // CHECK: call void @__ubsan_handle_nullability_return{{.*}}
   return arg1;
   // CHECK: [[NONULL]]:
@@ -143,10 +146,13 @@ int *_Nonnull nonnull_retval2(int *_Nonnull arg1,  //< Test this.
 -(int *_Nonnull) objc_method: (int *_Nonnull) arg1 {
   // CHECK: [[ARG1CMP:%.*]] = icmp ne i32* %arg1, null, !nosanitize
   // CHECK-NEXT: [[DO_RV_CHECK:%.*]] = and i1 true, [[ARG1CMP]]
-  // CHECK: br i1 [[DO_RV_CHECK]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
+  // CHECK: [[SLOC_PTR:%.*]] = load i8*, i8** %return.sloc.ptr
+  // CHECK-NEXT: [[SLOC_NONNULL:%.*]] = icmp ne i8* [[SLOC_PTR]], null
+  // CHECK-NEXT: [[DO_RV_CHECK_2:%.*]] = and i1 [[SLOC_NONNULL]], [[DO_RV_CHECK]]
+  // CHECK: br i1 [[DO_RV_CHECK_2]], label %[[NULL:.*]], label %[[NONULL:.*]], !nosanitize
   // CHECK: [[NULL]]:
   // CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32* {{.*}}, null, !nosanitize
-  // CHECK-NEXT: br i1 [[ICMP]], {{.*}}, !nosanitize
+  // CHECK: br i1 [[ICMP]], {{.*}}, !nosanitize
   // CHECK: call void @__ubsan_handle_nullability_return{{.*}}
   return arg1;
   // CHECK: [[NONULL]]:
diff --git a/test/CodeGenOpenCL/spir_version.cl b/test/CodeGenOpenCL/spir_version.cl
index 8a191282b3c78..ac5b8e8c7fa5e 100644
--- a/test/CodeGenOpenCL/spir_version.cl
+++ b/test/CodeGenOpenCL/spir_version.cl
@@ -10,17 +10,18 @@
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL20
 
 kernel void foo() {}
+kernel void bar() {}
 
-// CHECK-SPIR-CL10: !opencl.spir.version = !{[[SPIR:![0-9]+]]}
-// CHECK-SPIR-CL10: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
-// CHECK-SPIR-CL10: [[SPIR]] = !{i32 1, i32 2}
-// CHECK-SPIR-CL10: [[OCL]] = !{i32 1, i32 0}
-// CHECK-SPIR-CL12: !opencl.spir.version = !{[[VER:![0-9]+]]}
-// CHECK-SPIR-CL12: !opencl.ocl.version = !{[[VER]]}
+// CHECK-SPIR-CL10-DAG: !opencl.spir.version = !{[[SPIR:![0-9]+]]}
+// CHECK-SPIR-CL10-DAG: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
+// CHECK-SPIR-CL10-DAG: [[SPIR]] = !{i32 1, i32 2}
+// CHECK-SPIR-CL10-DAG: [[OCL]] = !{i32 1, i32 0}
+// CHECK-SPIR-CL12-DAG: !opencl.spir.version = !{[[VER:![0-9]+]]}
+// CHECK-SPIR-CL12-DAG: !opencl.ocl.version = !{[[VER]]}
 // CHECK-SPIR-CL12: [[VER]] = !{i32 1, i32 2}
 
-// CHECK-SPIR-CL20: !opencl.spir.version = !{[[VER:![0-9]+]]}
-// CHECK-SPIR-CL20: !opencl.ocl.version = !{[[VER]]}
+// CHECK-SPIR-CL20-DAG: !opencl.spir.version = !{[[VER:![0-9]+]]}
+// CHECK-SPIR-CL20-DAG: !opencl.ocl.version = !{[[VER]]}
 // CHECK-SPIR-CL20: [[VER]] = !{i32 2, i32 0}
 
 // CHECK-AMDGCN-CL10-NOT: !opencl.spir.version
diff --git a/test/Driver/ananas.c b/test/Driver/ananas.c
new file mode 100644
index 0000000000000..2a5b35ed6cec6
--- /dev/null
+++ b/test/Driver/ananas.c
@@ -0,0 +1,9 @@
+// RUN: %clang -no-canonical-prefixes -target x86_64-unknown-ananas -static %s \
+// RUN:   --sysroot=%S/Inputs/ananas-tree -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-STATIC %s
+// CHECK-STATIC: ld{{.*}}" "-Bstatic"
+// CHECK-STATIC: crt0.o
+// CHECK-STATIC: crti.o
+// CHECK-STATIC: crtbegin.o
+// CHECK-STATIC: crtend.o
+// CHECK-STATIC: crtn.o
diff --git a/test/Driver/autocomplete.c b/test/Driver/autocomplete.c
index 94649f2437381..f0bdcce5707cd 100644
--- a/test/Driver/autocomplete.c
+++ b/test/Driver/autocomplete.c
@@ -2,5 +2,37 @@
 // FSYN: -fsyntax-only
 // RUN: %clang --autocomplete=-s | FileCheck %s -check-prefix=STD
 // STD: -std={{.*}}-stdlib=
-// RUN: %clang --autocomplete=foo | not FileCheck %s -check-prefix=NONE
-// NONE: foo
+// RUN: %clang --autocomplete=foo | FileCheck %s -check-prefix=FOO
+// FOO-NOT: foo
+// RUN: %clang --autocomplete=-stdlib=,l | FileCheck %s -check-prefix=STDLIB
+// STDLIB: libc++ libstdc++
+// RUN: %clang --autocomplete=-stdlib=, | FileCheck %s -check-prefix=STDLIBALL
+// STDLIBALL: libc++ libstdc++ platform
+// RUN: %clang --autocomplete=-meabi,d | FileCheck %s -check-prefix=MEABI
+// MEABI: default
+// RUN: %clang --autocomplete=-meabi, | FileCheck %s -check-prefix=MEABIALL
+// MEABIALL: 4 5 default gnu
+// RUN: %clang --autocomplete=-cl-std=,CL2 | FileCheck %s -check-prefix=CLSTD
+// CLSTD: CL2.0
+// RUN: %clang --autocomplete=-cl-std=, | FileCheck %s -check-prefix=CLSTDALL
+// CLSTDALL: cl CL cl1.1 CL1.1 cl1.2 CL1.2 cl2.0 CL2.0
+// RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER
+// FNOSANICOVER: func
+// RUN: %clang --autocomplete=-fno-sanitize-coverage=, | FileCheck %s -check-prefix=FNOSANICOVERALL
+// FNOSANICOVERALL: 8bit-counters bb edge func indirect-calls inline-8bit-counters no-prune trace-bb trace-cmp trace-div trace-gep trace-pc trace-pc-guard
+// RUN: %clang --autocomplete=-ffp-contract=, | FileCheck %s -check-prefix=FFPALL
+// FFPALL: fast off on
+// RUN: %clang --autocomplete=-flto=, | FileCheck %s -check-prefix=FLTOALL
+// FLTOALL: full thin
+// RUN: %clang --autocomplete=-fveclib=, | FileCheck %s -check-prefix=FVECLIBALL
+// FVECLIBALL: Accelerate none SVML
+// RUN: %clang --autocomplete=-fshow-overloads=, | FileCheck %s -check-prefix=FSOVERALL
+// FSOVERALL: all best
+// RUN: %clang --autocomplete=-fvisibility=, | FileCheck %s -check-prefix=FVISIBILITYALL
+// FVISIBILITYALL: default hidden
+// RUN: %clang --autocomplete=-mfloat-abi=, | FileCheck %s -check-prefix=MFLOATABIALL
+// MFLOATABIALL: hard soft softfp
+// RUN: %clang --autocomplete=-mthread-model, | FileCheck %s -check-prefix=MTHREADMODELALL
+// MTHREADMODELALL: posix single
+// RUN: %clang --autocomplete=-mrelocation-model, | FileCheck %s -check-prefix=MRELOCMODELALL
+// MRELOCMODELALL: dynamic-no-pic pic ropi ropi-rwpi rwpi static
diff --git a/test/Driver/compress-noias.c b/test/Driver/compress-noias.c
new file mode 100644
index 0000000000000..d20cf366b8fca
--- /dev/null
+++ b/test/Driver/compress-noias.c
@@ -0,0 +1,37 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -Wa,-compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-_COMPRESS_DEBUG_SECTIONS %s
+// CHECK-_COMPRESS_DEBUG_SECTIONS: "-compress-debug-sections"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -Wa,--compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-__COMPRESS_DEBUG_SECTIONS %s
+// CHECK-__COMPRESS_DEBUG_SECTIONS: "--compress-debug-sections"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -Wa,--compress-debug-sections -Wa,--nocompress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-POSNEG %s
+// CHECK-POSNEG: "--compress-debug-sections"
+// CHECK-POSNEG: "--nocompress-debug-sections"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -Wa,-compress-debug-sections -Wa,--compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-MULTIPLE %s
+// CHECK-MULTIPLE: "-compress-debug-sections"
+// CHECK-MULTIPLE: "--compress-debug-sections"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ %s
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ %s
+// CHECK-OPT_GZ: "-compress-debug-sections"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=none -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=none -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s
+// CHECK-OPT_GZ_EQ_NONE: "-compress-debug-sections=none"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
+// CHECK-OPT_GZ_EQ_ZLIB: "-compress-debug-sections=zlib"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib-gnu -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib-gnu -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
+// CHECK-OPT_GZ_EQ_ZLIB_GNU: "-compress-debug-sections=zlib-gnu"
+
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
+// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=invalid -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
+// CHECK-OPT_GZ_EQ_INVALID: error: unsupported argument 'invalid' to option 'gz='
+
diff --git a/test/Driver/compress.c b/test/Driver/compress.c
index 6cdc6b72243ec..a00ce91225bd9 100644
--- a/test/Driver/compress.c
+++ b/test/Driver/compress.c
@@ -1,8 +1,36 @@
-// RUN: %clang -### -c -integrated-as -Wa,-compress-debug-sections %s 2>&1 | FileCheck --check-prefix=COMPRESS_DEBUG %s
-// RUN: %clang -### -c -integrated-as -Wa,--compress-debug-sections %s 2>&1 | FileCheck --check-prefix=COMPRESS_DEBUG %s
 // REQUIRES: zlib
 
-// COMPRESS_DEBUG: "-compress-debug-sections"
+// RUN: %clang -### -fintegrated-as -Wa,-compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-_COMPRESS_DEBUG_SECTIONS %s
+// CHECK-_COMPRESS_DEBUG_SECTIONS: "-compress-debug-sections"
+
+// RUN: %clang -### -fintegrated-as -Wa,--compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-__COMPRESS_DEBUG_SECTIONS %s
+// CHECK-__COMPRESS_DEBUG_SECTIONS: "--compress-debug-sections"
+
+// RUN: %clang -### -fintegrated-as -Wa,--compress-debug-sections -Wa,--nocompress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-POSNEG %s
+// CHECK-POSNEG: "--compress-debug-sections"
+// CHECK-POSNEG: "--nocompress-debug-sections"
+
+// RUN: %clang -### -fintegrated-as -Wa,-compress-debug-sections -Wa,--compress-debug-sections -c %s 2>&1 | FileCheck -check-prefix CHECK-MULTIPLE %s
+// CHECK-MULTIPLE: "-compress-debug-sections"
+// CHECK-MULTIPLE: "--compress-debug-sections"
+
+// RUN: %clang -### -fintegrated-as -gz -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ %s
+// RUN: %clang -### -fintegrated-as -gz -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ %s
+// CHECK-OPT_GZ: "-compress-debug-sections"
+
+// RUN: %clang -### -fintegrated-as -gz=none -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s
+// RUN: %clang -### -fintegrated-as -gz=none -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s
+// CHECK-OPT_GZ_EQ_NONE: "-compress-debug-sections=none"
+
+// RUN: %clang -### -fintegrated-as -gz=zlib -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
+// RUN: %clang -### -fintegrated-as -gz=zlib -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
+// CHECK-OPT_GZ_EQ_ZLIB: "-compress-debug-sections=zlib"
+
+// RUN: %clang -### -fintegrated-as -gz=zlib-gnu -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
+// RUN: %clang -### -fintegrated-as -gz=zlib-gnu -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
+// CHECK-OPT_GZ_EQ_ZLIB_GNU: "-compress-debug-sections=zlib-gnu"
+
+// RUN: %clang -### -fintegrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
+// RUN: %clang -### -fintegrated-as -gz=invalid -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
+// CHECK-OPT_GZ_EQ_INVALID: error: unsupported argument 'invalid' to option 'gz='
 
-// RUN: %clang -### -c -integrated-as -Wa,--compress-debug-sections -Wa,--nocompress-debug-sections %s 2>&1 | FileCheck --check-prefix=NOCOMPRESS_DEBUG %s
-// NOCOMPRESS_DEBUG-NOT: "-compress-debug-sections"
diff --git a/test/Driver/fsanitize-object-size.c b/test/Driver/fsanitize-object-size.c
new file mode 100644
index 0000000000000..b96221e0fd43e
--- /dev/null
+++ b/test/Driver/fsanitize-object-size.c
@@ -0,0 +1,31 @@
+// Check that the object size check is disabled at -O0.
+//
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size %s -O0 -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=null,object-size %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-OSIZE-NO-WARNING
+
+// Check that the object size check is enabled at other optimization levels.
+//
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -O1 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -O2 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -O3 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -O4 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -Ofast %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -Os %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -Oz %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=object-size -Og %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+
+// Use of trap mode shouldn't affect the object size check.
+//
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -O1 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined-trap -O1 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined-trap -fsanitize-undefined-trap-on-error -O1 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HAS-OSIZE
+
+// CHECK-HAS-OSIZE-NOT: warning: the object size sanitizer
+// CHECK-HAS-OSIZE: -fsanitize={{[^ ]*}}object-size
+
+// CHECK-NO-OSIZE: warning: the object size sanitizer has no effect at -O0, but is explicitly enabled: -fsanitize={{[^ ]*}}object-size
+// CHECK-NO-OSIZE-NOT: -fsanitize={{[^ ]*}}object-size
+
+// CHECK-NO-OSIZE-NO-WARNING-NOT: -fsanitize={{[^ ]*}}object-size
diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
index f14459df6391f..0752ef6df0909 100644
--- a/test/Driver/fsanitize.c
+++ b/test/Driver/fsanitize.c
@@ -3,18 +3,18 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined-trap -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-undefined-trap-on-error -fsanitize=undefined-trap %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
-// CHECK-UNDEFINED-TRAP: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute|function),?){19}"}}
-// CHECK-UNDEFINED-TRAP: "-fsanitize-trap=alignment,array-bounds,bool,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,object-size,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
-// CHECK-UNDEFINED-TRAP2: "-fsanitize-trap=alignment,array-bounds,bool,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,object-size,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,unreachable,vla-bound"
+// CHECK-UNDEFINED-TRAP: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute|function),?){18}"}}
+// CHECK-UNDEFINED-TRAP: "-fsanitize-trap=alignment,array-bounds,bool,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
+// CHECK-UNDEFINED-TRAP2: "-fsanitize-trap=alignment,array-bounds,bool,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,unreachable,vla-bound"
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED
-// CHECK-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|vptr|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){20}"}}
+// CHECK-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|vptr|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){19}"}}
 
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-DARWIN
-// CHECK-UNDEFINED-DARWIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
+// CHECK-UNDEFINED-DARWIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 
 // RUN: %clang -target i386-unknown-openbsd -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-OPENBSD
-// CHECK-UNDEFINED-OPENBSD: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
+// CHECK-UNDEFINED-OPENBSD: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 
 // RUN: %clang -target i386-pc-win32 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32
 // RUN: %clang -target i386-pc-win32 -fsanitize=undefined -x c++ %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32 --check-prefix=CHECK-UNDEFINED-WIN-CXX
@@ -23,7 +23,7 @@
 // CHECK-UNDEFINED-WIN32: "--dependent-lib={{[^"]*}}ubsan_standalone-i386.lib"
 // CHECK-UNDEFINED-WIN64: "--dependent-lib={{[^"]*}}ubsan_standalone-x86_64.lib"
 // CHECK-UNDEFINED-WIN-CXX: "--dependent-lib={{[^"]*}}ubsan_standalone_cxx{{[^"]*}}.lib"
-// CHECK-UNDEFINED-WIN-SAME: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
+// CHECK-UNDEFINED-WIN-SAME: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 
 // RUN: %clang -target i386-pc-win32 -fsanitize-coverage=bb %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-COVERAGE-WIN32
 // CHECK-COVERAGE-WIN32: "--dependent-lib={{[^"]*}}ubsan_standalone-i386.lib"
@@ -43,7 +43,7 @@
 // CHECK-FNO-SANITIZE-ALL: "-fsanitize=thread"
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=thread,undefined -fno-sanitize=thread -fno-sanitize=float-cast-overflow,vptr,bool,enum %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PARTIAL-UNDEFINED
-// CHECK-PARTIAL-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|pointer-overflow|array-bounds|returns-nonnull-attribute|nonnull-attribute),?){16}"}}
+// CHECK-PARTIAL-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|array-bounds|returns-nonnull-attribute|nonnull-attribute),?){15}"}}
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=shift -fno-sanitize=shift-base %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FSANITIZE-SHIFT-PARTIAL
 // CHECK-FSANITIZE-SHIFT-PARTIAL: "-fsanitize=shift-exponent"
@@ -217,11 +217,11 @@
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=thread -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=all -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
-// CHECK-RECOVER-UBSAN: "-fsanitize-recover={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|vla-bound|alignment|null|vptr|object-size|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
+// CHECK-RECOVER-UBSAN: "-fsanitize-recover={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|vla-bound|alignment|null|vptr|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 // CHECK-NO-RECOVER-UBSAN-NOT: sanitize-recover
 
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=object-size,shift-base -### 2>&1 | FileCheck %s --check-prefix=CHECK-PARTIAL-RECOVER
-// CHECK-PARTIAL-RECOVER: "-fsanitize-recover={{((object-size|shift-base),?){2}"}}
+// CHECK-PARTIAL-RECOVER: "-fsanitize-recover={{((shift-base),?){1}"}}
 
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=address -fsanitize-recover=all -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-ASAN
 // CHECK-RECOVER-ASAN: "-fsanitize-recover=address"
diff --git a/test/Driver/nozlibcompress.c b/test/Driver/nozlibcompress.c
index 9986c85d79aea..41e1794ad9c39 100644
--- a/test/Driver/nozlibcompress.c
+++ b/test/Driver/nozlibcompress.c
@@ -1,6 +1,7 @@
-// RUN: %clang -c %s -Wa,--compress-debug-sections 2>&1 | FileCheck %s
-// RUN: %clang -c %s -Wa,--compress-debug-sections -Wa,--nocompress-debug-sections 2>&1 | FileCheck --allow-empty --check-prefix=NOWARN %s
 // REQUIRES: nozlib
 
-// CHECK: warning: cannot compress debug sections (zlib not installed)
-// NOWARN-NOT: warning: cannot compress debug sections (zlib not installed)
+// RUN: %clang -### -fintegrated-as -gz -c %s 2>&1 | FileCheck %s -check-prefix CHECK-WARN
+// RUN: %clang -### -fintegrated-as -gz=none -c %s 2>&1 | FileCheck -allow-empty -check-prefix CHECK-NOWARN %s
+
+// CHECK-WARN: warning: cannot compress debug sections (zlib not installed)
+// CHECK-NOWARN-NOT: warning: cannot compress debug sections (zlib not installed)
diff --git a/test/Driver/wasm-toolchain.c b/test/Driver/wasm-toolchain.c
index 3be60df926729..8debc02870077 100644
--- a/test/Driver/wasm-toolchain.c
+++ b/test/Driver/wasm-toolchain.c
@@ -27,18 +27,18 @@
 
 // RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK %s
 // LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "crt1.o" "crti.o" "[[temp]]" "-allow-undefined-file" "wasm.syms" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
 
 // A basic C link command-line with optimization. WebAssembly is somewhat
 // special in enabling --gc-sections by default.
 
 // RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK_OPT %s
 // LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib" "crt1.o" "crti.o" "[[temp]]" "-allow-undefined-file" "wasm.syms" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
 
 // Ditto, but ensure that a user --no-gc-sections comes after the
 // default --gc-sections.
 
 // RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo -Wl,--no-gc-sections %s 2>&1 | FileCheck -check-prefix=NO_GC_SECTIONS %s
 // NO_GC_SECTIONS: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// NO_GC_SECTIONS: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "--no-gc-sections" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// NO_GC_SECTIONS: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib" "crt1.o" "crti.o" "--no-gc-sections" "[[temp]]" "-allow-undefined-file" "wasm.syms" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
diff --git a/test/FixIt/fixit-format-darwin.m b/test/FixIt/fixit-format-darwin.m
index bfc71291a5c3a..077cc0cf21bcc 100644
--- a/test/FixIt/fixit-format-darwin.m
+++ b/test/FixIt/fixit-format-darwin.m
@@ -57,3 +57,20 @@ void test() {
   Log3("test 7: %s", getNSInteger(), getNSUInteger());
   // CHECK: Log3("test 7: %ld", (long)getNSInteger(), (unsigned long)getNSUInteger());
 }
+
+#define Outer1(...) \
+do { \
+  printf(__VA_ARGS__); \
+} while (0)
+
+#define Outer2(...) \
+do { \
+  Outer1(__VA_ARGS__); Outer1(__VA_ARGS__); \
+} while (0)
+
+void bug33447() {
+  Outer2("test 8: %s", getNSInteger());  
+  // CHECK: Outer2("test 8: %ld", (long)getNSInteger());
+  Outer2("test 9: %s %s", getNSInteger(), getNSInteger());
+  // CHECK: Outer2("test 9: %ld %ld", (long)getNSInteger(), (long)getNSInteger());
+}
diff --git a/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext b/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext
new file mode 100644
index 0000000000000..aeea583de4d4f
--- /dev/null
+++ b/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext
@@ -0,0 +1,7 @@
+foo:0:0
+ 0: 0
+bar:29:29
+ 6: foo:0
+main:0:0
+ 0: 0 bar:0
+
diff --git a/test/Frontend/optimization-remark-with-hotness.c b/test/Frontend/optimization-remark-with-hotness.c
index 708f5ec8d4bf9..30ead64b8eb54 100644
--- a/test/Frontend/optimization-remark-with-hotness.c
+++ b/test/Frontend/optimization-remark-with-hotness.c
@@ -1,11 +1,21 @@
+// Generate instrumentation and sampling profile data.
 // RUN: llvm-profdata merge \
-// RUN:     %S/Inputs/optimization-remark-with-hotness.proftext   \
+// RUN:     %S/Inputs/optimization-remark-with-hotness.proftext \
 // RUN:     -o %t.profdata
+// RUN: llvm-profdata merge -sample \
+// RUN:     %S/Inputs/optimization-remark-with-hotness-sample.proftext \
+// RUN:     -o %t-sample.profdata
+//
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \
 // RUN:     optimization-remark-with-hotness.c %s -emit-llvm-only \
 // RUN:     -fprofile-instrument-use-path=%t.profdata -Rpass=inline \
 // RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
 // RUN:     -fdiagnostics-show-hotness -verify
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \
+// RUN:     optimization-remark-with-hotness.c %s -emit-llvm-only \
+// RUN:     -fprofile-sample-use=%t-sample.profdata -Rpass=inline \
+// RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
+// RUN:     -fdiagnostics-show-hotness -verify
 // The clang version of the previous test.
 // RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \
 // RUN:     -fprofile-instr-use=%t.profdata -Rpass=inline \
diff --git a/test/Frontend/pp-only-no-editor-placeholders.c b/test/Frontend/pp-only-no-editor-placeholders.c
new file mode 100644
index 0000000000000..515d9893fd762
--- /dev/null
+++ b/test/Frontend/pp-only-no-editor-placeholders.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -E -verify -o - %s | FileCheck %s
+// expected-no-diagnostics
+
+<#placeholder#>; // CHECK: <#placeholder#>;
diff --git a/test/Import/indirect-struct-member-access/Inputs/S.c b/test/Import/indirect-struct-member-access/Inputs/S.c
new file mode 100644
index 0000000000000..b0876d27df441
--- /dev/null
+++ b/test/Import/indirect-struct-member-access/Inputs/S.c
@@ -0,0 +1,3 @@
+struct S {
+  int a;
+};
diff --git a/test/Import/indirect-struct-member-access/test.c b/test/Import/indirect-struct-member-access/test.c
new file mode 100644
index 0000000000000..cbf7e65ec1cca
--- /dev/null
+++ b/test/Import/indirect-struct-member-access/test.c
@@ -0,0 +1,4 @@
+// RUN: clang-import-test -import %S/Inputs/S.c -expression %s
+void expr(struct S *MyS) {
+  MyS->a = 3;
+}
diff --git a/test/Index/Core/index-source.cpp b/test/Index/Core/index-source.cpp
index 10f2d8f777475..6d20fdd48e50f 100644
--- a/test/Index/Core/index-source.cpp
+++ b/test/Index/Core/index-source.cpp
@@ -134,6 +134,9 @@ class PseudoOverridesInSpecializations {
 
   template<typename U> struct InnerTemplate { };
   template<typename U> struct InnerTemplate <U*> { };
+
+  template<typename U>
+  class InnerClass { };
 };
 
 template<>
@@ -195,7 +198,21 @@ class PseudoOverridesInSpecializations<double, int> {
 // CHECK-NEXT: RelChild
 // CHECK-NEXT: RelSpecialization | InnerTemplate | c:@ST>2#T#T@PseudoOverridesInSpecializations@ST>1#T@InnerTemplate
   template<typename U> struct InnerTemplate <U*> { };
+
+  template<typename U>
+  class InnerClass;
+// CHECK: [[@LINE-1]]:9 | class(Gen)/C++ | InnerClass | c:@S@PseudoOverridesInSpecializations>#d#I@ST>1#T@InnerClass | <no-cgname> | Ref,RelCont,RelSpecialization | rel: 2
+// CHECK-NEXT: RelCont
+// CHECK-NEXT: RelSpecialization | InnerClass | c:@ST>2#T#T@PseudoOverridesInSpecializations@ST>1#T@InnerClass
+};
+
+template<typename U>
+class PseudoOverridesInSpecializations<double, int>::InnerClass {
 };
+// CHECK: [[@LINE-2]]:54 | class(Gen)/C++ | InnerClass | c:@S@PseudoOverridesInSpecializations>#d#I@ST>1#T@InnerClass | <no-cgname> | Def,RelChild | rel: 1
+// CHECK-NEXT: RelChild
+// CHECK: [[@LINE-4]]:7 | class(Gen)/C++ | PseudoOverridesInSpecializations | c:@ST>2#T#T@PseudoOverridesInSpecializations | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK-NEXT: RelCont
 
 template<typename S>
 class PseudoOverridesInSpecializations<float, S> {
@@ -265,7 +282,9 @@ class SpecializationDecl { };
 
 template<>
 class SpecializationDecl<int>;
-// CHECK: [[@LINE-1]]:7 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Ref | rel: 0
+// CHECK: [[@LINE-1]]:7 | class(Gen,TS)/C++ | SpecializationDecl | c:@S@SpecializationDecl>#I | <no-cgname> | Decl,RelSpecialization | rel: 1
+// CHECK-NEXT: RelSpecialization | SpecializationDecl | c:@ST>1#T@SpecializationDecl
+// CHECK: [[@LINE-3]]:7 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Ref | rel: 0
 
 template<>
 class SpecializationDecl<int> { };
@@ -275,8 +294,10 @@ class SpecializationDecl<int> { };
 
 template<typename T>
 class PartialSpecilizationClass<Cls, T>;
-// CHECK: [[@LINE-1]]:7 | class(Gen)/C++ | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass | <no-cgname> | Ref | rel: 0
-// CHECK-NEXT: [[@LINE-2]]:33 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref | rel: 0
+// CHECK: [[@LINE-1]]:7 | class(Gen,TPS)/C++ | PartialSpecilizationClass | c:@SP>1#T@PartialSpecilizationClass>#$@S@Cls#t0.0 | <no-cgname> | Decl,RelSpecialization | rel: 1
+// CHECK-NEXT: RelSpecialization | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass
+// CHECK: [[@LINE-3]]:7 | class(Gen)/C++ | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass | <no-cgname> | Ref | rel: 0
+// CHECK-NEXT: [[@LINE-4]]:33 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref | rel: 0
 
 template<>
 class PartialSpecilizationClass<Cls, Cls> : Cls { };
diff --git a/test/Index/singe-file-parse.m b/test/Index/singe-file-parse.m
deleted file mode 100644
index d13915b30c5f6..0000000000000
--- a/test/Index/singe-file-parse.m
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: c-index-test -single-file-parse %s | FileCheck %s
-
-#include <stdint.h>
-
-// CHECK-NOT: TypedefDecl=intptr_t
-
-// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=MyCls
-@interface MyCls
-// CHECK: [[@LINE+1]]:8: ObjCInstanceMethodDecl=some_meth
--(void)some_meth;
-@end
diff --git a/test/Index/single-file-parse.m b/test/Index/single-file-parse.m
new file mode 100644
index 0000000000000..f75b9bd0ee544
--- /dev/null
+++ b/test/Index/single-file-parse.m
@@ -0,0 +1,121 @@
+// RUN: c-index-test -single-file-parse %s | FileCheck %s
+
+#include <stdint.h>
+
+// CHECK-NOT: TypedefDecl=intptr_t
+
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=MyCls
+@interface MyCls
+// CHECK: [[@LINE+1]]:8: ObjCInstanceMethodDecl=some_meth
+-(void)some_meth;
+@end
+
+#if 1
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test1
+@interface Test1 @end
+#else
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test2 @end
+#endif
+
+#if 0
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test3 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test4
+@interface Test4 @end
+#endif
+
+#if SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test5
+@interface Test5 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test6
+@interface Test6 @end
+#endif
+
+#define SOMETHING_DEFINED 1
+#if SOMETHING_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test7
+@interface Test7 @end
+#else
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test8 @end
+#endif
+
+#if defined(SOMETHING_NOT_DEFINED)
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test9
+@interface Test9 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test10
+@interface Test10 @end
+#endif
+
+#if defined(SOMETHING_DEFINED)
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test11
+@interface Test11 @end
+#else
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test12 @end
+#endif
+
+#if SOMETHING_NOT_DEFINED1
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test13
+@interface Test13 @end
+#elif SOMETHING_NOT_DEFINED2
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test14
+@interface Test14 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test15
+@interface Test15 @end
+#endif
+
+#ifdef SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test19
+@interface Test19 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test20
+@interface Test20 @end
+#endif
+
+#ifdef SOMETHING_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test21
+@interface Test21 @end
+#else
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test22 @end
+#endif
+
+#ifndef SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test23
+@interface Test23 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test24
+@interface Test24 @end
+#endif
+
+#ifndef SOMETHING_DEFINED
+// CHECK-NOT: [[@LINE+1]]:12:
+@interface Test25 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test26
+@interface Test26 @end
+#endif
+
+#if 1 < SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test27
+@interface Test27 @end
+#else
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test28
+@interface Test28 @end
+#endif
+
+#if SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test29
+@interface Test29 @end
+#endif
+
+#ifdef SOMETHING_NOT_DEFINED
+// CHECK: [[@LINE+1]]:12: ObjCInterfaceDecl=Test30
+@interface Test30 @end
+#endif
diff --git a/test/Misc/Inputs/module.modulemap b/test/Misc/Inputs/module.modulemap
new file mode 100644
index 0000000000000..a8ecb09390a25
--- /dev/null
+++ b/test/Misc/Inputs/module.modulemap
@@ -0,0 +1 @@
+module X {}
diff --git a/test/Misc/ast-dump-decl.c b/test/Misc/ast-dump-decl.c
index 45edea26b4f84..313c808c4ac8e 100644
--- a/test/Misc/ast-dump-decl.c
+++ b/test/Misc/ast-dump-decl.c
@@ -1,8 +1,13 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump -ast-dump-filter Test %s | FileCheck -check-prefix CHECK -strict-whitespace %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck -check-prefix CHECK-TU -strict-whitespace %s
+// RUN: %clang_cc1 -fmodules -fmodules-local-submodule-visibility -fmodule-name=X -triple x86_64-unknown-unknown -fmodule-map-file=%S/Inputs/module.modulemap -ast-dump -ast-dump-filter Test %s -DMODULES | FileCheck -check-prefix CHECK -check-prefix CHECK-MODULES -strict-whitespace %s
 
 int TestLocation;
-// CHECK: VarDecl 0x{{[^ ]*}} <{{.*}}:4:1, col:5> col:5 TestLocation
+// CHECK: VarDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE-1]]:1, col:5> col:5 TestLocation
+
+#ifdef MODULES
+#pragma clang module begin X
+#endif
 
 struct TestIndent {
   int x;
@@ -33,7 +38,7 @@ typedef int TestTypedefDecl;
 // CHECK:      TypedefDecl{{.*}} TestTypedefDecl 'int'
 
 __module_private__ typedef int TestTypedefDeclPrivate;
-// CHECK:      TypedefDecl{{.*}} TestTypedefDeclPrivate 'int' __module_private__
+// CHECK-MODULE:      TypedefDecl{{.*}} TestTypedefDeclPrivate 'int' __module_private__
 
 enum TestEnumDecl {
   testEnumDecl
@@ -53,7 +58,7 @@ enum TestEnumDeclForward;
 // CHECK:      EnumDecl{{.*}} TestEnumDeclForward
 
 __module_private__ enum TestEnumDeclPrivate;
-// CHECK:      EnumDecl{{.*}} TestEnumDeclPrivate __module_private__
+// CHECK-MODULE:      EnumDecl{{.*}} TestEnumDeclPrivate __module_private__
 
 struct TestRecordDecl {
   int i;
@@ -83,7 +88,7 @@ struct TestRecordDeclForward;
 // CHECK:      RecordDecl{{.*}} struct TestRecordDeclForward
 
 __module_private__ struct TestRecordDeclPrivate;
-// CHECK:      RecordDecl{{.*}} struct TestRecordDeclPrivate __module_private__
+// CHECK-MODULE:      RecordDecl{{.*}} struct TestRecordDeclPrivate __module_private__
 
 enum testEnumConstantDecl {
   TestEnumConstantDecl,
@@ -136,7 +141,7 @@ struct testFieldDecl {
 // CHECK:      FieldDecl{{.*}} TestFieldDecl 'int'
 // CHECK:      FieldDecl{{.*}} TestFieldDeclWidth 'int'
 // CHECK-NEXT:   IntegerLiteral
-// CHECK:      FieldDecl{{.*}} TestFieldDeclPrivate 'int' __module_private__
+// CHECK-MODULE:      FieldDecl{{.*}} TestFieldDeclPrivate 'int' __module_private__
 
 int TestVarDecl;
 // CHECK:      VarDecl{{.*}} TestVarDecl 'int'
@@ -148,7 +153,7 @@ __thread int TestVarDeclThread;
 // CHECK:      VarDecl{{.*}} TestVarDeclThread 'int' tls{{$}}
 
 __module_private__ int TestVarDeclPrivate;
-// CHECK:      VarDecl{{.*}} TestVarDeclPrivate 'int' __module_private__
+// CHECK-MODULE:      VarDecl{{.*}} TestVarDeclPrivate 'int' __module_private__
 
 int TestVarDeclInit = 0;
 // CHECK:      VarDecl{{.*}} TestVarDeclInit 'int'
@@ -156,3 +161,8 @@ int TestVarDeclInit = 0;
 
 void testParmVarDecl(int TestParmVarDecl);
 // CHECK: ParmVarDecl{{.*}} TestParmVarDecl 'int'
+
+#ifdef MODULES
+#pragma clang module end
+#endif
+
diff --git a/test/Misc/ast-dump-decl.cpp b/test/Misc/ast-dump-decl.cpp
index e1cdeb0995fa5..7370f46c5b322 100644
--- a/test/Misc/ast-dump-decl.cpp
+++ b/test/Misc/ast-dump-decl.cpp
@@ -95,17 +95,12 @@ class TestCXXRecordDeclPack : public T... {
 thread_local int TestThreadLocalInt;
 // CHECK: TestThreadLocalInt {{.*}} tls_dynamic
 
-__module_private__ class TestCXXRecordDeclPrivate;
-// CHECK: CXXRecordDecl{{.*}} class TestCXXRecordDeclPrivate __module_private__
-
 class testCXXMethodDecl {
-  __module_private__ void TestCXXMethodDeclPrivate();
   virtual void TestCXXMethodDeclPure() = 0;
   void TestCXXMethodDeclDelete() = delete;
   void TestCXXMethodDeclThrow() throw();
   void TestCXXMethodDeclThrowType() throw(int);
 };
-// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclPrivate 'void (void)' __module_private__
 // CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclPure 'void (void)' virtual pure
 // CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclDelete 'void (void)' delete
 // CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclThrow 'void (void) throw()'
diff --git a/test/Misc/cc1as-compress.s b/test/Misc/cc1as-compress.s
new file mode 100644
index 0000000000000..beba227138d87
--- /dev/null
+++ b/test/Misc/cc1as-compress.s
@@ -0,0 +1,8 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+
+// RUN: %clang -cc1as -triple i686 --compress-debug-sections -filetype asm %s -o /dev/null 2>&1 | FileCheck -allow-empty %s
+// RUN: %clang -cc1as -triple i686 -compress-debug-sections -filetype asm %s -o /dev/null 2>&1 | FileCheck -allow-empty %s
+
+// CHECK-NOT: error: unknown argument:
+
diff --git a/test/Modules/Inputs/preprocess/file.h b/test/Modules/Inputs/preprocess/file.h
index 808ade5768b15..84cf22a337406 100644
--- a/test/Modules/Inputs/preprocess/file.h
+++ b/test/Modules/Inputs/preprocess/file.h
@@ -1,3 +1,9 @@
+#include "other.h"
+
+#ifndef FILE_H
+#define FILE_H
 struct __FILE;
 #include "fwd.h"
 typedef struct __FILE FILE;
+typedef foo bar;
+#endif
diff --git a/test/Modules/Inputs/preprocess/fwd.h b/test/Modules/Inputs/preprocess/fwd.h
index 4a19c6d0c057c..f6de1800c0000 100644
--- a/test/Modules/Inputs/preprocess/fwd.h
+++ b/test/Modules/Inputs/preprocess/fwd.h
@@ -1 +1,2 @@
+typedef struct foo foo;
 struct __FILE;
diff --git a/test/Modules/Inputs/preprocess/module.modulemap b/test/Modules/Inputs/preprocess/module.modulemap
index f700db03beac4..5be2e5c4ff98a 100644
--- a/test/Modules/Inputs/preprocess/module.modulemap
+++ b/test/Modules/Inputs/preprocess/module.modulemap
@@ -1,5 +1,5 @@
 module fwd { header "fwd.h" export * }
-module file { header "file.h" header "file2.h" export * }
+module file { header "file.h" header "file2.h" header "other.h" export * }
 module nested {
   module a { header "a.h" }
   module b { header "b.h" }
diff --git a/test/Modules/Inputs/preprocess/other.h b/test/Modules/Inputs/preprocess/other.h
new file mode 100644
index 0000000000000..84c4d1d5eb235
--- /dev/null
+++ b/test/Modules/Inputs/preprocess/other.h
@@ -0,0 +1 @@
+// other.h: empty
diff --git a/test/Modules/const-var-init-update.cpp b/test/Modules/const-var-init-update.cpp
new file mode 100644
index 0000000000000..61080eb83917a
--- /dev/null
+++ b/test/Modules/const-var-init-update.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c++1z -fmodules %s -verify
+// expected-no-diagnostics
+
+#pragma clang module build std
+module std { module limits {} module other {} }
+#pragma clang module contents
+#pragma clang module begin std.limits
+template<typename T> struct numeric_limits {
+  static constexpr T __max = 5;
+  static constexpr T max() { return __max; }
+};
+#pragma clang module end
+#pragma clang module begin std.other
+inline void f() { numeric_limits<int> nl; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build module_b
+module module_b {}
+#pragma clang module contents
+#pragma clang module begin module_b
+#pragma clang module import std.limits
+constexpr int a = numeric_limits<int>::max();
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import std.limits
+#pragma clang module import module_b
+constexpr int b = a;
+static_assert(b == 5);
diff --git a/test/Modules/interface-visibility.m b/test/Modules/interface-visibility.m
new file mode 100644
index 0000000000000..2bb124ce09569
--- /dev/null
+++ b/test/Modules/interface-visibility.m
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -fmodules -fobjc-arc -x objective-c-module-map %s -fmodule-name=Foo -verify
+
+module Foo {}
+
+#pragma clang module contents
+#pragma clang module begin Foo
+
+// expected-no-diagnostics
+
+#pragma clang module build Foundation
+module Foundation {}
+#pragma clang module contents
+#pragma clang module begin Foundation
+@interface NSIndexSet
+@end
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import Foundation
+
+@interface NSIndexSet (Testing)
+- (int)foo;
+@end
+
+static inline int test(NSIndexSet *obj) {
+  return [obj foo];
+}
+
+#pragma clang module end
diff --git a/test/Modules/preprocess-module.cpp b/test/Modules/preprocess-module.cpp
index 000290fc971bb..b0cbac18e7807 100644
--- a/test/Modules/preprocess-module.cpp
+++ b/test/Modules/preprocess-module.cpp
@@ -29,15 +29,15 @@
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/rewrite.pcm %s -I%t -verify -fno-modules-error-recovery -DREWRITE -DINCLUDE -I%S/Inputs/preprocess
 
 // Now try building the module when the header files are missing.
-// RUN: cp %S/Inputs/preprocess/fwd.h %S/Inputs/preprocess/file.h %S/Inputs/preprocess/file2.h %S/Inputs/preprocess/module.modulemap %t
+// RUN: cp %S/Inputs/preprocess/fwd.h %S/Inputs/preprocess/file.h %S/Inputs/preprocess/file2.h %S/Inputs/preprocess/other.h %S/Inputs/preprocess/module.modulemap %t
 // RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -I%t -x c++-module-map %t/module.modulemap -E -frewrite-includes -o %t/copy.ii
-// RUN: rm %t/fwd.h %t/file.h %t/file2.h %t/module.modulemap
+// RUN: rm %t/fwd.h %t/file.h %t/file2.h %t/other.h %t/module.modulemap
 // RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -x c++-module-map-cpp-output %t/copy.ii -emit-module -o %t/copy.pcm
 
 // Check that our module contains correct mapping information for the headers.
-// RUN: cp %S/Inputs/preprocess/fwd.h %S/Inputs/preprocess/file.h %S/Inputs/preprocess/file2.h %S/Inputs/preprocess/module.modulemap %t
+// RUN: cp %S/Inputs/preprocess/fwd.h %S/Inputs/preprocess/file.h %S/Inputs/preprocess/file2.h %S/Inputs/preprocess/other.h %S/Inputs/preprocess/module.modulemap %t
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/copy.pcm %s -I%t -verify -fno-modules-error-recovery -DCOPY -DINCLUDE
-// RUN: rm %t/fwd.h %t/file.h %t/file2.h %t/module.modulemap
+// RUN: rm %t/fwd.h %t/file.h %t/file2.h %t/other.h %t/module.modulemap
 
 // Check that we can preprocess from a .pcm file and that we get the same result as preprocessing from the original sources.
 // RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -I%S/Inputs/preprocess -x c++-module-map %S/Inputs/preprocess/module.modulemap -emit-module -o %t/file.pcm
@@ -50,6 +50,10 @@
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/file.rewrite.pcm %s -I%t -verify -fno-modules-error-recovery -DFILE_REWRITE
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/file.rewrite.pcm %s -I%t -verify -fno-modules-error-recovery -DFILE_REWRITE -DINCLUDE -I%S/Inputs/preprocess
 //
+// Check that we can preprocess this user of the .pcm file.
+// RUN: %clang_cc1 -fmodules -fmodule-file=%t/file.pcm %s -I%t -E -frewrite-imports -o %t/preprocess-module.ii
+// RUN: %clang_cc1 -fmodules %t/preprocess-module.ii -verify -fno-modules-error-recovery -DFILE_REWRITE_FULL
+//
 // Check that language / header search options are ignored when preprocessing from a .pcm file.
 // RUN: %clang_cc1 %t/file.pcm -E -frewrite-includes -o %t/file.rewrite.ii.2
 // RUN: cmp %t/file.rewrite.ii %t/file.rewrite.ii.2
@@ -78,12 +82,15 @@
 // REWRITE: #pragma clang module begin file
 // CHECK: # 1 "{{.*}}file.h" 1
 // NO-REWRITE: #pragma clang module begin file
-// NO-REWRITE: # 1 "{{.*}}file.h"{{$}}
 //
-// CHECK: struct __FILE;
+// REWRITE: #ifndef FILE_H
+// REWRITE: #define FILE_H
+//
 // CHECK: #pragma clang module import fwd /* clang {{-E|-frewrite-includes}}: implicit import
 // CHECK: typedef struct __FILE FILE;
 //
+// REWRITE: #endif
+//
 // REWRITE: #pragma clang module end
 // CHECK: # 2 "<module-includes>" 2
 // NO-REWRITE: #pragma clang module end
@@ -105,11 +112,16 @@
 // REWRITE: #pragma clang module begin file
 // CHECK: # 1 "{{.*}}file.h" 1
 // NO-REWRITE: #pragma clang module begin file
-// NO-REWRITE: # 1 "{{.*}}file.h"{{$}}
 //
-// CHECK: struct __FILE;
-// CHECK: #pragma clang module import fwd /* clang {{-E|-frewrite-includes}}: implicit import
-// CHECK: typedef struct __FILE FILE;
+// REWRITE: #ifndef FILE_H
+// REWRITE: #define FILE_H
+// REWRITE: #if 0
+// REWRITE: #include "fwd.h"
+// REWRITE: #endif
+// REWRITE-NOT: #pragma clang module import fwd
+// REWRITE: #endif
+//
+// NO-REWRITE-NOT: struct __FILE;
 //
 // REWRITE: #pragma clang module end
 // CHECK: # 2 "{{.*}}file2.h" 2
@@ -124,15 +136,17 @@
 // NO-REWRITE: #pragma clang module end
 
 
-__FILE *a; // expected-error {{declaration of '__FILE' must be imported}}
+__FILE *a; // expected-error-re {{{{declaration of '__FILE' must be imported|unknown type name '__FILE'}}}}
 #if FILE_REWRITE
-// expected-note@file.rewrite.ii:1 {{here}}
+// expected-note@file.rewrite.ii:* {{here}}
+#elif FILE_REWRITE_FULL
+// No note diagnostic at all in this case: we've built the 'file' module but not loaded it into this compilation yet.
 #elif REWRITE
-// expected-note@rewrite.ii:1 {{here}}
+// expected-note@rewrite.ii:* {{here}}
 #elif COPY
-// expected-note@copy.ii:1 {{here}}
+// expected-note@copy.ii:* {{here}}
 #else
-// expected-note@no-rewrite.ii:1 {{here}}
+// expected-note@no-rewrite.ii:* {{here}}
 #endif
 
 #ifdef INCLUDE
diff --git a/test/Modules/string_names.cpp b/test/Modules/string_names.cpp
index 43068f13c0125..a6503d048d6b8 100644
--- a/test/Modules/string_names.cpp
+++ b/test/Modules/string_names.cpp
@@ -1,6 +1,10 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -fmodules-decluse -I %S/Inputs/string_names %s -fmodule-name="my/module-a" -verify
 
+// Check that we can preprocess with string module names.
+// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/string_names %s -fmodule-name="my/module-a" -E -frewrite-imports -o %t/test.ii
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-decluse -I %S/Inputs/string_names %t/test.ii -fmodule-name="my/module-a"
+
 #include "a.h"
 #include "b.h" // expected-error {{does not depend on a module exporting}}
 #include "c.h"
diff --git a/test/Parser/objc-at-implementation-eof-crash.m b/test/Parser/objc-at-implementation-eof-crash.m
new file mode 100644
index 0000000000000..76e56c10703e0
--- /dev/null
+++ b/test/Parser/objc-at-implementation-eof-crash.m
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -verify -Wno-objc-root-class %s
+
+@interface ClassA
+
+- (void)fileExistsAtPath:(int)x;
+
+@end
+
+@interface ClassB
+
+@end
+
+@implementation ClassB // expected-note {{implementation started here}}
+
+- (void) method:(ClassA *)mgr { // expected-note {{to match this '{'}}
+  [mgr fileExistsAtPath:0
+} // expected-error {{expected ']'}}
+
+@implementation ClassC //              \
+  // expected-error {{missing '@end'}} \
+  // expected-error {{expected '}'}}   \
+  // expected-warning {{cannot find interface declaration for 'ClassC'}}
+
+@end
diff --git a/test/Parser/objc-at-interface-eof-crash.m b/test/Parser/objc-at-interface-eof-crash.m
new file mode 100644
index 0000000000000..2c7bfd688f06c
--- /dev/null
+++ b/test/Parser/objc-at-interface-eof-crash.m
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -verify -Wno-objc-root-class %s
+
+@interface ClassA
+
+- (void)fileExistsAtPath:(int)x;
+
+@end
+
+@interface ClassB
+
+@end
+
+@implementation ClassB // expected-note {{implementation started here}}
+
+- (void) method:(ClassA *)mgr { // expected-note {{to match this '{'}}
+  [mgr fileExistsAtPath:0
+} // expected-error {{expected ']'}}
+
+@interface ClassC //                   \
+  // expected-error {{missing '@end'}} \
+  // expected-error {{expected '}'}}
+
+@end
diff --git a/test/Sema/asm.c b/test/Sema/asm.c
index e49a1663a8968..04b7cb19eb83a 100644
--- a/test/Sema/asm.c
+++ b/test/Sema/asm.c
@@ -160,6 +160,41 @@ double test15() {
   return ret;
 }
 
+void iOutputConstraint(int x){
+  __asm ("nop" : "=ir" (x) : :); // no-error
+  __asm ("nop" : "=ri" (x) : :); // no-error
+  __asm ("nop" : "=ig" (x) : :); // no-error
+  __asm ("nop" : "=im" (x) : :); // no-error
+  __asm ("nop" : "=imr" (x) : :); // no-error
+  __asm ("nop" : "=i" (x) : :); // expected-error{{invalid output constraint '=i' in asm}}
+  __asm ("nop" : "+i" (x) : :); // expected-error{{invalid output constraint '+i' in asm}}
+  __asm ("nop" : "=ii" (x) : :); // expected-error{{invalid output constraint '=ii' in asm}}
+  __asm ("nop" : "=nr" (x) : :); // no-error
+  __asm ("nop" : "=rn" (x) : :); // no-error
+  __asm ("nop" : "=ng" (x) : :); // no-error
+  __asm ("nop" : "=nm" (x) : :); // no-error
+  __asm ("nop" : "=nmr" (x) : :); // no-error
+  __asm ("nop" : "=n" (x) : :); // expected-error{{invalid output constraint '=n' in asm}}
+  __asm ("nop" : "+n" (x) : :); // expected-error{{invalid output constraint '+n' in asm}}
+  __asm ("nop" : "=nn" (x) : :); // expected-error{{invalid output constraint '=nn' in asm}}
+  __asm ("nop" : "=Fr" (x) : :); // no-error
+  __asm ("nop" : "=rF" (x) : :); // no-error
+  __asm ("nop" : "=Fg" (x) : :); // no-error
+  __asm ("nop" : "=Fm" (x) : :); // no-error
+  __asm ("nop" : "=Fmr" (x) : :); // no-error
+  __asm ("nop" : "=F" (x) : :); // expected-error{{invalid output constraint '=F' in asm}}
+  __asm ("nop" : "+F" (x) : :); // expected-error{{invalid output constraint '+F' in asm}}
+  __asm ("nop" : "=FF" (x) : :); // expected-error{{invalid output constraint '=FF' in asm}}
+  __asm ("nop" : "=Er" (x) : :); // no-error
+  __asm ("nop" : "=rE" (x) : :); // no-error
+  __asm ("nop" : "=Eg" (x) : :); // no-error
+  __asm ("nop" : "=Em" (x) : :); // no-error
+  __asm ("nop" : "=Emr" (x) : :); // no-error
+  __asm ("nop" : "=E" (x) : :); // expected-error{{invalid output constraint '=E' in asm}}
+  __asm ("nop" : "+E" (x) : :); // expected-error{{invalid output constraint '+E' in asm}}
+  __asm ("nop" : "=EE" (x) : :); // expected-error{{invalid output constraint '=EE' in asm}}
+}
+
 // PR19837
 struct foo {
   int a;
diff --git a/test/Sema/overloadable.c b/test/Sema/overloadable.c
index 49d8085651d4d..be9b862f2934f 100644
--- a/test/Sema/overloadable.c
+++ b/test/Sema/overloadable.c
@@ -106,8 +106,8 @@ void fn_type_conversions() {
   void foo(char *c) __attribute__((overloadable));
   void (*ptr1)(void *) = &foo;
   void (*ptr2)(char *) = &foo;
-  void (*ambiguous)(int *) = &foo; // expected-error{{initializing 'void (*)(int *)' with an expression of incompatible type '<overloaded function type>'}} expected-note@105{{candidate function}} expected-note@106{{candidate function}}
-  void *vp_ambiguous = &foo; // expected-error{{initializing 'void *' with an expression of incompatible type '<overloaded function type>'}} expected-note@105{{candidate function}} expected-note@106{{candidate function}}
+  void (*ambiguous)(int *) = &foo; // expected-error{{initializing 'void (*)(int *)' with an expression of incompatible type '<overloaded function type>'}} expected-note@-4{{candidate function}} expected-note@-3{{candidate function}}
+  void *vp_ambiguous = &foo; // expected-error{{initializing 'void *' with an expression of incompatible type '<overloaded function type>'}} expected-note@-5{{candidate function}} expected-note@-4{{candidate function}}
 
   void (*specific1)(int *) = (void (*)(void *))&foo; // expected-warning{{incompatible function pointer types initializing 'void (*)(int *)' with an expression of type 'void (*)(void *)'}}
   void *specific2 = (void (*)(void *))&foo;
@@ -117,8 +117,8 @@ void fn_type_conversions() {
   void disabled(char *c) __attribute__((overloadable, enable_if(1, "The function name lies.")));
   // To be clear, these should all point to the last overload of 'disabled'
   void (*dptr1)(char *c) = &disabled;
-  void (*dptr2)(void *c) = &disabled; // expected-warning{{incompatible pointer types initializing 'void (*)(void *)' with an expression of type '<overloaded function type>'}} expected-note@115{{candidate function made ineligible by enable_if}} expected-note@116{{candidate function made ineligible by enable_if}} expected-note@117{{candidate function has type mismatch at 1st parameter (expected 'void *' but has 'char *')}}
-  void (*dptr3)(int *c) = &disabled; // expected-warning{{incompatible pointer types initializing 'void (*)(int *)' with an expression of type '<overloaded function type>'}} expected-note@115{{candidate function made ineligible by enable_if}} expected-note@116{{candidate function made ineligible by enable_if}} expected-note@117{{candidate function has type mismatch at 1st parameter (expected 'int *' but has 'char *')}}
+  void (*dptr2)(void *c) = &disabled; // expected-warning{{incompatible pointer types initializing 'void (*)(void *)' with an expression of type '<overloaded function type>'}} expected-note@-5{{candidate function made ineligible by enable_if}} expected-note@-4{{candidate function made ineligible by enable_if}} expected-note@-3{{candidate function has type mismatch at 1st parameter (expected 'void *' but has 'char *')}}
+  void (*dptr3)(int *c) = &disabled; // expected-warning{{incompatible pointer types initializing 'void (*)(int *)' with an expression of type '<overloaded function type>'}} expected-note@-6{{candidate function made ineligible by enable_if}} expected-note@-5{{candidate function made ineligible by enable_if}} expected-note@-4{{candidate function has type mismatch at 1st parameter (expected 'int *' but has 'char *')}}
 
   void *specific_disabled = &disabled;
 }
@@ -131,14 +131,14 @@ void incompatible_pointer_type_conversions() {
   void foo(char *c) __attribute__((overloadable));
   void foo(short *c) __attribute__((overloadable));
   foo(charbuf);
-  foo(ucharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@131{{candidate function}} expected-note@132{{candidate function}}
-  foo(intbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@131{{candidate function}} expected-note@132{{candidate function}}
+  foo(ucharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@-3{{candidate function}} expected-note@-2{{candidate function}}
+  foo(intbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@-4{{candidate function}} expected-note@-3{{candidate function}}
 
   void bar(unsigned char *c) __attribute__((overloadable));
   void bar(signed char *c) __attribute__((overloadable));
-  bar(charbuf); // expected-error{{call to 'bar' is ambiguous}} expected-note@137{{candidate function}} expected-note@138{{candidate function}}
+  bar(charbuf); // expected-error{{call to 'bar' is ambiguous}} expected-note@-2{{candidate function}} expected-note@-1{{candidate function}}
   bar(ucharbuf);
-  bar(intbuf); // expected-error{{call to 'bar' is ambiguous}} expected-note@137{{candidate function}} expected-note@138{{candidate function}}
+  bar(intbuf); // expected-error{{call to 'bar' is ambiguous}} expected-note@-4{{candidate function}} expected-note@-3{{candidate function}}
 }
 
 void dropping_qualifiers_is_incompatible() {
@@ -148,8 +148,8 @@ void dropping_qualifiers_is_incompatible() {
   void foo(char *c) __attribute__((overloadable));
   void foo(const volatile unsigned char *c) __attribute__((overloadable));
 
-  foo(ccharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@148{{candidate function}} expected-note@149{{candidate function}}
-  foo(vcharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@148{{candidate function}} expected-note@149{{candidate function}}
+  foo(ccharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@-3{{candidate function}} expected-note@-2{{candidate function}}
+  foo(vcharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@-4{{candidate function}} expected-note@-3{{candidate function}}
 }
 
 // Bug: we used to treat `__typeof__(foo)` as though it was `__typeof__(&foo)`
diff --git a/test/SemaCXX/PR16677.cpp b/test/SemaCXX/PR16677.cpp
index 7140ac79f0893..efa4faaacd693 100644
--- a/test/SemaCXX/PR16677.cpp
+++ b/test/SemaCXX/PR16677.cpp
@@ -10,7 +10,6 @@ class Base { };
 template<class T,  // Should be angle bracket instead of comma
 class Derived : public Base<T> { // expected-error{{'Derived' cannot be defined in a type specifier}}
   Class_With_Destructor member;
-}; // expected-error{{a non-type template parameter cannot have type 'class Derived'}}
-   // expected-error@-1{{expected ',' or '>' in template-parameter-list}}
-   // expected-warning@-2{{declaration does not declare anything}}
+}; // expected-error{{expected ',' or '>' in template-parameter-list}}
+   // expected-warning@-1{{declaration does not declare anything}}
 
diff --git a/test/SemaCXX/cxx1y-deduced-return-type.cpp b/test/SemaCXX/cxx1y-deduced-return-type.cpp
index bfe0ab9dcdbc2..13ff751acae4d 100644
--- a/test/SemaCXX/cxx1y-deduced-return-type.cpp
+++ b/test/SemaCXX/cxx1y-deduced-return-type.cpp
@@ -55,6 +55,25 @@ auto b(bool k) {
   return "goodbye";
 }
 
+// Allow 'operator auto' to call only the explicit operator auto.
+struct BothOps {
+  template <typename T> operator T();
+  template <typename T> operator T *();
+  operator auto() { return 0; }
+  operator auto *() { return this; }
+};
+struct JustTemplateOp {
+  template <typename T> operator T();
+  template <typename T> operator T *();
+};
+
+auto c() {
+  BothOps().operator auto(); // ok
+  BothOps().operator auto *(); // ok
+  JustTemplateOp().operator auto(); // expected-error {{no member named 'operator auto' in 'JustTemplateOp'}}
+  JustTemplateOp().operator auto *(); // expected-error {{no member named 'operator auto *' in 'JustTemplateOp'}}
+}
+
 auto *ptr_1() {
   return 100; // expected-error {{cannot deduce return type 'auto *' from returned value of type 'int'}}
 }
diff --git a/test/SemaCXX/cxx1z-noexcept-function-type.cpp b/test/SemaCXX/cxx1z-noexcept-function-type.cpp
index 40dc3a22e530b..524ea8e53c10b 100644
--- a/test/SemaCXX/cxx1z-noexcept-function-type.cpp
+++ b/test/SemaCXX/cxx1z-noexcept-function-type.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -std=c++14 -verify -fexceptions -fcxx-exceptions %s
 // RUN: %clang_cc1 -std=c++1z -verify -fexceptions -fcxx-exceptions %s -Wno-dynamic-exception-spec
+// RUN: %clang_cc1 -std=c++14 -verify -fexceptions -fcxx-exceptions -Wno-c++1z-compat-mangling -DNO_COMPAT_MANGLING %s
 
 #if __cplusplus > 201402L
 
@@ -81,7 +82,7 @@ namespace CompatWarning {
   auto f5() -> void (*)() throw();
   auto f6() -> void (&)() throw();
   auto f7() -> void (X::*)() throw();
-#if __cplusplus <= 201402L
+#if __cplusplus <= 201402L && !defined(NO_COMPAT_MANGLING)
   // expected-warning@-8 {{mangled name of 'f1' will change in C++17 due to non-throwing exception specification in function signature}}
   // expected-warning@-8 {{mangled name of 'f2' will change in C++17 due to non-throwing exception specification in function signature}}
   // expected-warning@-8 {{mangled name of 'f3' will change in C++17 due to non-throwing exception specification in function signature}}
diff --git a/test/SemaCXX/friend2.cpp b/test/SemaCXX/friend2.cpp
index 347af0d61b1bc..d1d4b628ba2d9 100644
--- a/test/SemaCXX/friend2.cpp
+++ b/test/SemaCXX/friend2.cpp
@@ -170,3 +170,40 @@ struct Test {
 template class Test<int>;
 
 }
+
+namespace pr14785 {
+template<typename T>
+struct Somewhat {
+  void internal() const { }
+  friend void operator+(int const &, Somewhat<T> const &) {}  // expected-error{{redefinition of 'operator+'}}
+};
+
+void operator+(int const &, Somewhat<char> const &x) {  // expected-note {{previous definition is here}}
+  x.internal();  // expected-note{{in instantiation of template class 'pr14785::Somewhat<char>' requested here}}
+}
+}
+
+namespace D30375 {
+template <typename K> struct B {
+  template <typename A> bool insert(A &);
+};
+
+template <typename K>
+template <typename A> bool B<K>::insert(A &x) { return x < x; }
+
+template <typename K> class D {
+  B<K> t;
+
+public:
+  K x;
+  bool insert() { return t.insert(x); }
+  template <typename K1> friend bool operator<(const D<K1> &, const D<K1> &);
+};
+
+template <typename K> bool operator<(const D<K> &, const D<K> &);
+
+void func() {
+  D<D<int>> cache;
+  cache.insert();
+}
+}
diff --git a/test/SemaCXX/invalid-template-params.cpp b/test/SemaCXX/invalid-template-params.cpp
new file mode 100644
index 0000000000000..0c463fe13d5b0
--- /dev/null
+++ b/test/SemaCXX/invalid-template-params.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
+
+template<class> class Foo {
+  template<class UBar // expected-error {{expected ';' after class}}
+                      // expected-note@-1 {{'UBar' declared here}}
+  void foo1(); // expected-error {{a non-type template parameter cannot have type 'class UBar'}}
+               // expected-error@-1 {{expected ',' or '>' in template-parameter-list}}
+               // expected-warning@-2 {{declaration does not declare anything}}
+};
+
+Foo<int>::UBar g1; // expected-error {{no type named 'UBar' in 'Foo<int>'}}
+
+class C0 {
+public:
+  template<typename T0, typename T1 = T0 // missing closing angle bracket
+  struct S0 {}; // expected-error {{'S0' cannot be defined in a type specifier}}
+                // expected-error@-1 {{cannot combine with previous 'type-name' declaration specifier}}
+                // expected-error@-2 {{expected ',' or '>' in template-parameter-list}}
+                // expected-warning@-3 {{declaration does not declare anything}}
+  C0() : m(new S0<int>) {} // expected-error {{expected '(' for function-style cast or type construction}}
+                           // expected-error@-1 {{expected expression}}
+  S0<int> *m; // expected-error {{expected member name or ';' after declaration specifiers}}
+};
diff --git a/test/SemaCXX/warn-throw-out-noexcept-func.cpp b/test/SemaCXX/warn-throw-out-noexcept-func.cpp
new file mode 100644
index 0000000000000..dfd1ff9065ab9
--- /dev/null
+++ b/test/SemaCXX/warn-throw-out-noexcept-func.cpp
@@ -0,0 +1,265 @@
+// RUN: %clang_cc1 %s  -fdelayed-template-parsing -fcxx-exceptions -fsyntax-only -Wexceptions -verify -std=c++11
+struct A_ShouldDiag {
+  ~A_ShouldDiag(); // implicitly noexcept(true)
+};
+A_ShouldDiag::~A_ShouldDiag() { // expected-note {{destructor or deallocator has a (possibly implicit) non-throwing excepton specification}}
+  throw 1; // expected-warning {{has a non-throwing exception specification but can still throw, resulting in unexpected program termination}}
+}
+struct B_ShouldDiag {
+  int i;
+  ~B_ShouldDiag() noexcept(true) {} //no disg, no throw stmt
+};
+struct R_ShouldDiag : A_ShouldDiag {
+  B_ShouldDiag b;
+  ~R_ShouldDiag() {     // expected-note  {{destructor or deallocator has a}}
+    throw 1; // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+
+struct M_ShouldNotDiag {
+  B_ShouldDiag b;
+  ~M_ShouldNotDiag() noexcept(false);
+};
+
+M_ShouldNotDiag::~M_ShouldNotDiag() noexcept(false) {
+  throw 1;
+}
+
+struct N_ShouldDiag {
+  B_ShouldDiag b;
+  ~N_ShouldDiag(); //implicitly noexcept(true)
+};
+
+N_ShouldDiag::~N_ShouldDiag() {  // expected-note  {{destructor or deallocator has a}}
+  throw 1; // expected-warning {{has a non-throwing exception specification but}}
+}
+struct X_ShouldDiag {
+  B_ShouldDiag b;
+  ~X_ShouldDiag() noexcept { // expected-note  {{destructor or deallocator has a}}
+    throw 1;      // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+struct Y_ShouldDiag : A_ShouldDiag {
+  ~Y_ShouldDiag() noexcept(true) { // expected-note  {{destructor or deallocator has a}}
+    throw 1;            // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+struct C_ShouldNotDiag {
+  int i;
+  ~C_ShouldNotDiag() noexcept(false) {}
+};
+struct D_ShouldNotDiag {
+  C_ShouldNotDiag c;
+  ~D_ShouldNotDiag() { //implicitly noexcept(false)
+    throw 1;
+  }
+};
+struct E_ShouldNotDiag  {
+  C_ShouldNotDiag c;
+  ~E_ShouldNotDiag(); //implicitly noexcept(false)
+};
+E_ShouldNotDiag::~E_ShouldNotDiag() //implicitly noexcept(false)
+{
+  throw 1;
+}
+
+template <typename T>
+class A1_ShouldDiag {
+  T b;
+
+public:
+  ~A1_ShouldDiag() {    // expected-note  {{destructor or deallocator has a}}
+    throw 1; // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+template <typename T>
+struct B1_ShouldDiag {
+  T i;
+  ~B1_ShouldDiag() noexcept(true) {}
+};
+template <typename T>
+struct R1_ShouldDiag : A1_ShouldDiag<T> //expected-note {{in instantiation of member function}}
+{
+  B1_ShouldDiag<T> b;
+  ~R1_ShouldDiag() {    // expected-note  {{destructor or deallocator has a}}
+    throw 1; // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+template <typename T>
+struct S1_ShouldDiag : A1_ShouldDiag<T> {
+  B1_ShouldDiag<T> b;
+  ~S1_ShouldDiag() noexcept { // expected-note  {{destructor or deallocator has a}}
+    throw 1;       // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+void operator delete(void *ptr) noexcept { // expected-note  {{destructor or deallocator has a}}
+  throw 1;                                 // expected-warning {{has a non-throwing exception specification but}}
+}
+struct except_fun {
+  static const bool i = false;
+};
+struct noexcept_fun {
+  static const bool i = true;
+};
+template <typename T>
+struct dependent_warn {
+  ~dependent_warn() noexcept(T::i) {
+    throw 1;
+  }
+};
+template <typename T>
+struct dependent_warn_noexcept {
+  ~dependent_warn_noexcept() noexcept(T::i) { // expected-note  {{destructor or deallocator has a}}
+    throw 1;                                  // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+template <typename T>
+struct dependent_warn_both {
+  ~dependent_warn_both() noexcept(T::i) { // expected-note  {{destructor or deallocator has a}}
+    throw 1;                              // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+void foo() noexcept { //expected-note {{non-throwing function declare here}}
+  throw 1;            // expected-warning {{has a non-throwing exception specification but}}
+}
+struct Throws {
+  ~Throws() noexcept(false);
+};
+
+struct ShouldDiagnose {
+  Throws T;
+  ~ShouldDiagnose() noexcept { //expected-note {{destructor or deallocator has a}}
+    throw; // expected-warning {{has a non-throwing exception specification but}}
+  }
+};
+struct ShouldNotDiagnose {
+  Throws T;
+  ~ShouldNotDiagnose() { 
+    throw; 
+  }
+};
+
+void bar_ShouldNotDiag() noexcept {
+  try {
+    throw 1;
+  } catch (...) {
+  }
+}
+void f_ShouldNotDiag() noexcept {
+  try {
+    throw 12;
+  } catch (int) {
+  }
+}
+void g_ShouldNotDiag() noexcept {
+  try {
+    throw 12;
+  } catch (...) {
+  }
+}
+
+void h_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw 12; // expected-warning {{has a non-throwing exception specification but}}
+  } catch (const char *) {
+  }
+}
+
+void i_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw 12;
+  } catch (int) {
+    throw; // expected-warning {{has a non-throwing exception specification but}}
+  }
+}
+void j_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw 12;
+  } catch (int) {
+    throw "haha"; // expected-warning {{has a non-throwing exception specification but}}
+  }
+}
+
+void k_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw 12;
+  } catch (...) {
+    throw; // expected-warning {{has a non-throwing exception specification but}}
+  }
+}
+
+void loo_ShouldDiag(int i) noexcept { //expected-note {{non-throwing function declare here}}
+  if (i)
+    try {
+      throw 12;
+    } catch (int) {
+      throw "haha"; //expected-warning {{has a non-throwing exception specification but}}
+    }
+  i = 10;
+}
+
+void loo1_ShouldNotDiag() noexcept {
+  if (0)
+    throw 12;
+}
+
+void loo2_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  if (1)
+    throw 12; // expected-warning {{has a non-throwing exception specification but}}
+}
+struct S {};
+
+void l_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw S{}; //expected-warning {{has a non-throwing exception specification but}}
+  } catch (S *s) {
+  }
+}
+
+void m_ShouldNotDiag() noexcept {
+  try {
+    const S &s = S{};
+    throw s;
+  } catch (S s) {
+  }
+
+}
+void n_ShouldNotDiag() noexcept {
+  try {
+    S s = S{};
+    throw s;
+  } catch (const S &s) {
+  }
+}
+void o_ShouldDiag() noexcept { //expected-note {{non-throwing function declare here}}
+  try {
+    throw; //expected-warning {{has a non-throwing exception specification but}}
+  } catch (...) {
+  }
+}
+
+#define NOEXCEPT noexcept
+void with_macro() NOEXCEPT { //expected-note {{non-throwing function declare here}}
+  throw 1; // expected-warning {{has a non-throwing exception specification but}}
+}
+
+void with_try_block() try {
+  throw 2;
+} catch (...) {
+}
+
+void with_try_block1() noexcept try { //expected-note {{non-throwing function declare here}}
+  throw 2;  // expected-warning {{has a non-throwing exception specification but}}
+} catch (char *) {
+}
+
+int main() {
+  R1_ShouldDiag<int> o; //expected-note {{in instantiation of member function}}
+  S1_ShouldDiag<int> b; //expected-note {{in instantiation of member function}}
+  dependent_warn<except_fun> f;
+  dependent_warn_noexcept<noexcept_fun> f1; //expected-note {{in instantiation of member function}}
+  dependent_warn_both<except_fun> f2;
+  dependent_warn_both<noexcept_fun> f3; //expected-note {{in instantiation of member function}}
+  ShouldDiagnose obj;
+  ShouldNotDiagnose obj1;
+}
diff --git a/test/SemaObjC/unguarded-availability-new.m b/test/SemaObjC/unguarded-availability-new.m
new file mode 100644
index 0000000000000..33baedebc279f
--- /dev/null
+++ b/test/SemaObjC/unguarded-availability-new.m
@@ -0,0 +1,160 @@
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -xobjective-c++ -DMAC -triple x86_64-apple-macosx10.13 -fblocks -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -Wunguarded-availability-new -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -Wno-unguarded-availability-new -DNO_WARNING -fblocks -fsyntax-only -verify %s
+
+// unguarded-availability implies unguarded-availability-new:
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -Wunguarded-availability -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.11 -Wunguarded-availability -Wno-unguarded-availability-new -DNO_WARNING -DWARN_PREV -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -Wno-unguarded-availability -DNO_WARNING  -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -Wno-unguarded-availability -Wunguarded-availability-new -fblocks -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -D TEST_FUNC_CURRENT -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.13 -D TEST_FUNC_NEXT -DNO_WARNING -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-ios11 -DNO_WARNING -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMAC -triple x86_64-apple-macosx10.12 -DWARN_CURRENT -fblocks -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -DIOS -triple x86_64-apple-ios11 -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DIOS -triple x86_64-apple-ios11 -D TEST_FUNC_CURRENT -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DIOS -triple x86_64-apple-ios11 -D TEST_FUNC_NEXT -DNO_WARNING -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DIOS -triple x86_64-apple-ios10.3 -DWARN_CURRENT -fblocks -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -DTVOS -triple x86_64-apple-tvos11 -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DTVOS -triple x86_64-apple-tvos11 -D TEST_FUNC_CURRENT -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DTVOS -triple x86_64-apple-tvos11 -D TEST_FUNC_NEXT -DNO_WARNING -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DTVOS -triple x86_64-apple-tvos10 -DWARN_CURRENT -fblocks -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -DWATCHOS -triple i386-apple-watchos4 -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DWATCHOS -triple i386-apple-watchos4 -D TEST_FUNC_CURRENT -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DWATCHOS -triple i386-apple-watchos4 -D TEST_FUNC_NEXT -DNO_WARNING -fblocks -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DWATCHOS -triple i386-apple-watchos3 -DWARN_CURRENT -fblocks -fsyntax-only -verify %s
+
+#ifdef MAC
+#define PLATFORM macos
+#define NEXT 10.14
+
+#define AVAILABLE_PREV __attribute__((availability(macos, introduced = 10.12)))
+#define AVAILABLE_CURRENT __attribute__((availability(macos, introduced = 10.13)))
+#define AVAILABLE_NEXT __attribute__((availability(macos, introduced = 10.14)))
+#endif
+
+#ifdef IOS
+#define PLATFORM ios
+#define NEXT 12
+
+#define AVAILABLE_PREV __attribute__((availability(ios, introduced = 10)))
+#define AVAILABLE_CURRENT __attribute__((availability(ios, introduced = 11)))
+#define AVAILABLE_NEXT __attribute__((availability(ios, introduced = 12)))
+#endif
+
+#ifdef TVOS
+#define PLATFORM tvos
+#define NEXT 13
+
+#define AVAILABLE_PREV __attribute__((availability(tvos, introduced = 10)))
+#define AVAILABLE_CURRENT __attribute__((availability(tvos, introduced = 11)))
+#define AVAILABLE_NEXT __attribute__((availability(tvos, introduced = 13)))
+#endif
+
+#ifdef WATCHOS
+#define PLATFORM watchos
+#define NEXT 5
+
+#define AVAILABLE_PREV __attribute__((availability(watchos, introduced = 3)))
+#define AVAILABLE_CURRENT __attribute__((availability(watchos, introduced = 4)))
+#define AVAILABLE_NEXT __attribute__((availability(watchos, introduced = 5)))
+#endif
+
+void previouslyAvailable() AVAILABLE_PREV;
+#ifdef WARN_PREV
+  // expected-note@-2 {{'previouslyAvailable' has been explicitly marked partial here}}
+#endif
+void currentlyAvailable() AVAILABLE_CURRENT;
+#ifdef WARN_CURRENT
+  // expected-note@-2 {{'currentlyAvailable' has been explicitly marked partial here}}
+#endif
+void willBeAvailabile() AVAILABLE_NEXT;
+#ifndef NO_WARNING
+  // expected-note@-2 {{'willBeAvailabile' has been explicitly marked partial here}}
+#endif
+
+#ifdef TEST_FUNC_CURRENT
+#define FUNC_AVAILABLE AVAILABLE_CURRENT
+#endif
+#ifdef TEST_FUNC_NEXT
+#define FUNC_AVAILABLE AVAILABLE_NEXT
+#endif
+#ifndef FUNC_AVAILABLE
+#define FUNC_AVAILABLE
+#endif
+
+typedef int AVAILABLE_NEXT new_int;
+#ifndef NO_WARNING
+  // expected-note@-2 {{'new_int' has been explicitly marked partial here}}
+#endif
+FUNC_AVAILABLE new_int x;
+#ifndef NO_WARNING
+#ifdef MAC
+  // expected-warning@-3 {{'new_int' is partial: introduced in macOS 10.14}} expected-note@-3 {{explicitly redeclare 'new_int' to silence this warning}}
+#endif
+#ifdef IOS
+  // expected-warning@-6 {{'new_int' is partial: introduced in iOS 12}} expected-note@-6 {{explicitly redeclare 'new_int' to silence this warning}}
+#endif
+#ifdef TVOS
+  // expected-warning@-9 {{'new_int' is partial: introduced in tvOS 13}} expected-note@-9 {{explicitly redeclare 'new_int' to silence this warning}}
+#endif
+#ifdef WATCHOS
+  // expected-warning@-12 {{'new_int' is partial: introduced in watchOS 5}} expected-note@-12 {{explicitly redeclare 'new_int' to silence this warning}}
+#endif
+#endif
+
+void test() FUNC_AVAILABLE {
+  previouslyAvailable();
+#ifdef WARN_PREV
+#ifdef MAC
+  // expected-warning@-3 {{'previouslyAvailable' is only available on macOS 10.12 or newer}}
+#endif
+  // expected-note@-5 {{enclose 'previouslyAvailable' in an @available check to silence this warning}}
+#endif
+  currentlyAvailable();
+#ifdef WARN_CURRENT
+#ifdef MAC
+  // expected-warning@-3 {{'currentlyAvailable' is only available on macOS 10.13 or newer}}
+#endif
+#ifdef IOS
+  // expected-warning@-6 {{'currentlyAvailable' is only available on iOS 11 or newer}}
+#endif
+#ifdef TVOS
+  // expected-warning@-9 {{'currentlyAvailable' is only available on tvOS 11 or newer}}
+#endif
+#ifdef WATCHOS
+  // expected-warning@-12 {{'currentlyAvailable' is only available on watchOS 4 or newer}}
+#endif
+  // expected-note@-14 {{enclose 'currentlyAvailable' in an @available check to silence this warning}}
+#endif
+  willBeAvailabile();
+#ifndef NO_WARNING
+#ifdef MAC
+  // expected-warning@-3 {{'willBeAvailabile' is only available on macOS 10.14 or newer}}
+#endif
+#ifdef IOS
+  // expected-warning@-6 {{'willBeAvailabile' is only available on iOS 12 or newer}}
+#endif
+#ifdef TVOS
+  // expected-warning@-9 {{'willBeAvailabile' is only available on tvOS 13 or newer}}
+#endif
+#ifdef WATCHOS
+  // expected-warning@-12 {{'willBeAvailabile' is only available on watchOS 5 or newer}}
+#endif
+  // expected-note@-14 {{enclose 'willBeAvailabile' in an @available check to silence this warning}}
+#endif
+  if (@available(PLATFORM NEXT, *))
+    willBeAvailabile(); // OK
+}
+
+#ifdef NO_WARNING
+#ifndef WARN_PREV
+// expected-no-diagnostics
+#endif
+#endif
diff --git a/test/SemaOpenCL/storageclass.cl b/test/SemaOpenCL/storageclass.cl
index e611313b4a70b..9a461068f2375 100644
--- a/test/SemaOpenCL/storageclass.cl
+++ b/test/SemaOpenCL/storageclass.cl
@@ -13,6 +13,11 @@ void kernel foo(int x) {
   constant int L1 = 0;
   local int L2;
 
+  if (true) {
+    local int L1; // expected-error {{variables in the local address space can only be declared in the outermost scope of a kernel function}}
+    constant int L1 = 42; // expected-error {{variables in the constant address space can only be declared in the outermost scope of a kernel function}}
+  }
+
   auto int L3 = 7;                            // expected-error{{OpenCL version 1.2 does not support the 'auto' storage class specifier}}
   global int L4;                              // expected-error{{function scope variable cannot be declared in global address space}}
   __attribute__((address_space(100))) int L5; // expected-error{{automatic variable qualified with an invalid address space}}
author	Dimitry Andric <dim@FreeBSD.org>	2017-06-26 20:33:12 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-06-26 20:33:12 +0000
commit	ef915aab0ac566c55bfb0d7a9f6635bb5d94d4af (patch)
tree	ac935cfa19985d33098fc13e288b5ac830672dba /test
parent	325377b57338e700317f5e423e5b0f1c08d99a39 (diff)